AMDGPU/GlobalISel: Fix splitting 64-bit extensions
This was replicating the low bits into the high bits for G_ZEXT, rather than using 0.
This commit is contained in:
		
							parent
							
								
									7c3c5b11b1
								
							
						
					
					
						commit
						e8f6b0e583
					
				| 
						 | 
					@ -1778,6 +1778,34 @@ static void reinsertVectorIndexAdd(MachineIRBuilder &B,
 | 
				
			||||||
  IdxUseInstr.getOperand(OpIdx).setReg(Add.getReg(0));
 | 
					  IdxUseInstr.getOperand(OpIdx).setReg(Add.getReg(0));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Implement extending a 32-bit value to a 64-bit value. \p Lo32Reg is the
 | 
				
			||||||
 | 
					/// original 32-bit source value (to be inserted in the low part of the combined
 | 
				
			||||||
 | 
					/// 64-bit result), and \p Hi32Reg is the high half of the combined 64-bit
 | 
				
			||||||
 | 
					/// value.
 | 
				
			||||||
 | 
					static void extendLow32IntoHigh32(MachineIRBuilder &B,
 | 
				
			||||||
 | 
					                                  Register Hi32Reg, Register Lo32Reg,
 | 
				
			||||||
 | 
					                                  unsigned ExtOpc,
 | 
				
			||||||
 | 
					                                  const RegisterBank &RegBank,
 | 
				
			||||||
 | 
					                                  bool IsBooleanSrc = false) {
 | 
				
			||||||
 | 
					  if (ExtOpc == AMDGPU::G_ZEXT) {
 | 
				
			||||||
 | 
					    B.buildConstant(Hi32Reg, 0);
 | 
				
			||||||
 | 
					  } else if (ExtOpc == AMDGPU::G_SEXT) {
 | 
				
			||||||
 | 
					    if (IsBooleanSrc) {
 | 
				
			||||||
 | 
					      // If we know the original source was an s1, the high half is the same as
 | 
				
			||||||
 | 
					      // the low.
 | 
				
			||||||
 | 
					      B.buildCopy(Hi32Reg, Lo32Reg);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      // Replicate sign bit from 32-bit extended part.
 | 
				
			||||||
 | 
					      auto ShiftAmt = B.buildConstant(LLT::scalar(32), 31);
 | 
				
			||||||
 | 
					      B.getMRI()->setRegBank(ShiftAmt.getReg(0), RegBank);
 | 
				
			||||||
 | 
					      B.buildAShr(Hi32Reg, Lo32Reg, ShiftAmt);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  } else {
 | 
				
			||||||
 | 
					    assert(ExtOpc == AMDGPU::G_ANYEXT && "not an integer extension");
 | 
				
			||||||
 | 
					    B.buildUndef(Hi32Reg);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void AMDGPURegisterBankInfo::applyMappingImpl(
 | 
					void AMDGPURegisterBankInfo::applyMappingImpl(
 | 
				
			||||||
    const OperandsMapper &OpdMapper) const {
 | 
					    const OperandsMapper &OpdMapper) const {
 | 
				
			||||||
  MachineInstr &MI = OpdMapper.getMI();
 | 
					  MachineInstr &MI = OpdMapper.getMI();
 | 
				
			||||||
| 
						 | 
					@ -2237,26 +2265,19 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
 | 
				
			||||||
        // breakdowns supported.
 | 
					        // breakdowns supported.
 | 
				
			||||||
        DstTy.getSizeInBits() == 64 &&
 | 
					        DstTy.getSizeInBits() == 64 &&
 | 
				
			||||||
        SrcTy.getSizeInBits() <= 32) {
 | 
					        SrcTy.getSizeInBits() <= 32) {
 | 
				
			||||||
      const LLT S32 = LLT::scalar(32);
 | 
					 | 
				
			||||||
      SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
 | 
					      SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Extend to 32-bit, and then extend the low half.
 | 
					      // Extend to 32-bit, and then extend the low half.
 | 
				
			||||||
      if (Signed) {
 | 
					      if (Signed) {
 | 
				
			||||||
        // TODO: Should really be buildSExtOrCopy
 | 
					        // TODO: Should really be buildSExtOrCopy
 | 
				
			||||||
        B.buildSExtOrTrunc(DefRegs[0], SrcReg);
 | 
					        B.buildSExtOrTrunc(DefRegs[0], SrcReg);
 | 
				
			||||||
 | 
					 | 
				
			||||||
        // Replicate sign bit from 32-bit extended part.
 | 
					 | 
				
			||||||
        auto ShiftAmt = B.buildConstant(S32, 31);
 | 
					 | 
				
			||||||
        MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
 | 
					 | 
				
			||||||
        B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
 | 
					 | 
				
			||||||
      } else if (Opc == AMDGPU::G_ZEXT) {
 | 
					      } else if (Opc == AMDGPU::G_ZEXT) {
 | 
				
			||||||
        B.buildZExtOrTrunc(DefRegs[0], SrcReg);
 | 
					        B.buildZExtOrTrunc(DefRegs[0], SrcReg);
 | 
				
			||||||
        B.buildConstant(DefRegs[1], 0);
 | 
					 | 
				
			||||||
      } else {
 | 
					      } else {
 | 
				
			||||||
        B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
 | 
					        B.buildAnyExtOrTrunc(DefRegs[0], SrcReg);
 | 
				
			||||||
        B.buildUndef(DefRegs[1]);
 | 
					 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      extendLow32IntoHigh32(B, DefRegs[1], DefRegs[0], Opc, *SrcBank);
 | 
				
			||||||
      MRI.setRegBank(DstReg, *SrcBank);
 | 
					      MRI.setRegBank(DstReg, *SrcBank);
 | 
				
			||||||
      MI.eraseFromParent();
 | 
					      MI.eraseFromParent();
 | 
				
			||||||
      return;
 | 
					      return;
 | 
				
			||||||
| 
						 | 
					@ -2266,7 +2287,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
 | 
				
			||||||
      return;
 | 
					      return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // It is not legal to have a legalization artifact with a VCC source. Rather
 | 
					    // It is not legal to have a legalization artifact with a VCC source. Rather
 | 
				
			||||||
    // than introducing a copy, insert the selcet we would have to select the
 | 
					    // than introducing a copy, insert the select we would have to select the
 | 
				
			||||||
    // copy to.
 | 
					    // copy to.
 | 
				
			||||||
    if (SrcBank == &AMDGPU::VCCRegBank) {
 | 
					    if (SrcBank == &AMDGPU::VCCRegBank) {
 | 
				
			||||||
      SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
 | 
					      SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
 | 
				
			||||||
| 
						 | 
					@ -2289,7 +2310,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      if (DstSize > 32) {
 | 
					      if (DstSize > 32) {
 | 
				
			||||||
        B.buildSelect(DefRegs[0], SrcReg, True, False);
 | 
					        B.buildSelect(DefRegs[0], SrcReg, True, False);
 | 
				
			||||||
        B.buildCopy(DefRegs[1], DefRegs[0]);
 | 
					        extendLow32IntoHigh32(B, DefRegs[1], DefRegs[0], Opc, *SrcBank, true);
 | 
				
			||||||
      } else if (DstSize < 32) {
 | 
					      } else if (DstSize < 32) {
 | 
				
			||||||
        auto Sel = B.buildSelect(SelType, SrcReg, True, False);
 | 
					        auto Sel = B.buildSelect(SelType, SrcReg, True, False);
 | 
				
			||||||
        MRI.setRegBank(Sel.getReg(0), *DstBank);
 | 
					        MRI.setRegBank(Sel.getReg(0), *DstBank);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -144,8 +144,8 @@ body: |
 | 
				
			||||||
    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
 | 
					    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
 | 
				
			||||||
    ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
 | 
					    ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
 | 
				
			||||||
    ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
 | 
					    ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
 | 
				
			||||||
    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
 | 
					    ; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
 | 
				
			||||||
    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
 | 
					    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[DEF]](s32)
 | 
				
			||||||
    %0:_(s32) = COPY $vgpr0
 | 
					    %0:_(s32) = COPY $vgpr0
 | 
				
			||||||
    %1:_(s32) = COPY $vgpr1
 | 
					    %1:_(s32) = COPY $vgpr1
 | 
				
			||||||
    %2:_(s1) = G_ICMP intpred(eq), %0, %1
 | 
					    %2:_(s1) = G_ICMP intpred(eq), %0, %1
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -160,8 +160,8 @@ body: |
 | 
				
			||||||
    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
 | 
					    ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
 | 
				
			||||||
    ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
 | 
					    ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
 | 
				
			||||||
    ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
 | 
					    ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
 | 
				
			||||||
    ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
 | 
					    ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
 | 
				
			||||||
    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
 | 
					    ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C2]](s32)
 | 
				
			||||||
    %0:_(s32) = COPY $vgpr0
 | 
					    %0:_(s32) = COPY $vgpr0
 | 
				
			||||||
    %1:_(s32) = COPY $vgpr1
 | 
					    %1:_(s32) = COPY $vgpr1
 | 
				
			||||||
    %2:_(s1) = G_ICMP intpred(eq), %0, %1
 | 
					    %2:_(s1) = G_ICMP intpred(eq), %0, %1
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue