forked from OSchip/llvm-project
				
			AVX-512: optimized scalar compare patterns
removed AVX512SI format, since it is similar to AVX512BI. llvm-svn: 199217
This commit is contained in:
		
							parent
							
								
									6e53cfc1ac
								
							
						
					
					
						commit
						767fc967b4
					
				| 
						 | 
					@ -10235,8 +10235,11 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			||||||
      if (!Invert) return Op0;
 | 
					      if (!Invert) return Op0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      CCode = X86::GetOppositeBranchCondition(CCode);
 | 
					      CCode = X86::GetOppositeBranchCondition(CCode);
 | 
				
			||||||
      return DAG.getNode(X86ISD::SETCC, dl, VT,
 | 
					      SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
 | 
				
			||||||
                         DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
 | 
					                         DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
 | 
				
			||||||
 | 
					      if (VT == MVT::i1)
 | 
				
			||||||
 | 
					        return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
 | 
				
			||||||
 | 
					      return SetCC;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10247,8 +10250,11 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
 | 
					  SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
 | 
				
			||||||
  EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
 | 
					  EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
 | 
				
			||||||
  return DAG.getNode(X86ISD::SETCC, dl, VT,
 | 
					  SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
 | 
				
			||||||
                      DAG.getConstant(X86CC, MVT::i8), EFLAGS);
 | 
					                      DAG.getConstant(X86CC, MVT::i8), EFLAGS);
 | 
				
			||||||
 | 
					  if (VT == MVT::i1)
 | 
				
			||||||
 | 
					    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
 | 
				
			||||||
 | 
					  return SetCC;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
 | 
					// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
 | 
				
			||||||
| 
						 | 
					@ -17696,10 +17702,11 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
 | 
				
			||||||
          // See X86ATTInstPrinter.cpp:printSSECC().
 | 
					          // See X86ATTInstPrinter.cpp:printSSECC().
 | 
				
			||||||
          unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
 | 
					          unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
 | 
				
			||||||
          if (Subtarget->hasAVX512()) {
 | 
					          if (Subtarget->hasAVX512()) {
 | 
				
			||||||
            // SETCC type in AVX-512 is MVT::i1
 | 
					            SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00, CMP01,
 | 
				
			||||||
            assert(N->getValueType(0) == MVT::i1 && "Unexpected AND node type");
 | 
					 | 
				
			||||||
            return DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00, CMP01,
 | 
					 | 
				
			||||||
                               DAG.getConstant(x86cc, MVT::i8));
 | 
					                               DAG.getConstant(x86cc, MVT::i8));
 | 
				
			||||||
 | 
					            if (N->getValueType(0) != MVT::i1)
 | 
				
			||||||
 | 
					              return DAG.getNode(ISD::ZERO_EXTEND, DL, N->getValueType(0), FSetCC);
 | 
				
			||||||
 | 
					            return FSetCC;
 | 
				
			||||||
          }
 | 
					          }
 | 
				
			||||||
          SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01,
 | 
					          SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, CMP01,
 | 
				
			||||||
                                              DAG.getConstant(x86cc, MVT::i8));
 | 
					                                              DAG.getConstant(x86cc, MVT::i8));
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1356,32 +1356,32 @@ def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src1),
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
// Move Int Doubleword to Packed Double Int
 | 
					// Move Int Doubleword to Packed Double Int
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
def VMOVDI2PDIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
 | 
					def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
 | 
				
			||||||
                      "vmovd\t{$src, $dst|$dst, $src}",
 | 
					                      "vmovd\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                      [(set VR128X:$dst,
 | 
					                      [(set VR128X:$dst,
 | 
				
			||||||
                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
 | 
					                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
 | 
				
			||||||
                        EVEX, VEX_LIG;
 | 
					                        EVEX, VEX_LIG;
 | 
				
			||||||
def VMOVDI2PDIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
 | 
					def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
 | 
				
			||||||
                      "vmovd\t{$src, $dst|$dst, $src}",
 | 
					                      "vmovd\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                      [(set VR128X:$dst,
 | 
					                      [(set VR128X:$dst,
 | 
				
			||||||
                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
 | 
					                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
 | 
				
			||||||
                        IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
 | 
					                        IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
 | 
				
			||||||
def VMOV64toPQIZrr : AVX512SI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
 | 
					def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
 | 
				
			||||||
                      "vmovq\t{$src, $dst|$dst, $src}",
 | 
					                      "vmovq\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                        [(set VR128X:$dst,
 | 
					                        [(set VR128X:$dst,
 | 
				
			||||||
                          (v2i64 (scalar_to_vector GR64:$src)))],
 | 
					                          (v2i64 (scalar_to_vector GR64:$src)))],
 | 
				
			||||||
                          IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
 | 
					                          IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
 | 
				
			||||||
let isCodeGenOnly = 1 in {
 | 
					let isCodeGenOnly = 1 in {
 | 
				
			||||||
def VMOV64toSDZrr : AVX512SI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
 | 
					def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
 | 
				
			||||||
                       "vmovq\t{$src, $dst|$dst, $src}",
 | 
					                       "vmovq\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                       [(set FR64:$dst, (bitconvert GR64:$src))],
 | 
					                       [(set FR64:$dst, (bitconvert GR64:$src))],
 | 
				
			||||||
                       IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
 | 
					                       IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
 | 
				
			||||||
def VMOVSDto64Zrr : AVX512SI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
 | 
					def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
 | 
				
			||||||
                         "vmovq\t{$src, $dst|$dst, $src}",
 | 
					                         "vmovq\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                         [(set GR64:$dst, (bitconvert FR64:$src))],
 | 
					                         [(set GR64:$dst, (bitconvert FR64:$src))],
 | 
				
			||||||
                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
 | 
					                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
 | 
					def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
 | 
				
			||||||
                         "vmovq\t{$src, $dst|$dst, $src}",
 | 
					                         "vmovq\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                         [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
 | 
					                         [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
 | 
				
			||||||
                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
 | 
					                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
 | 
				
			||||||
| 
						 | 
					@ -1390,32 +1390,32 @@ def VMOVSDto64Zmr : AVX512SI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$s
 | 
				
			||||||
// Move Int Doubleword to Single Scalar
 | 
					// Move Int Doubleword to Single Scalar
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
let isCodeGenOnly = 1 in {
 | 
					let isCodeGenOnly = 1 in {
 | 
				
			||||||
def VMOVDI2SSZrr  : AVX512SI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
 | 
					def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
 | 
				
			||||||
                      "vmovd\t{$src, $dst|$dst, $src}",
 | 
					                      "vmovd\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                      [(set FR32X:$dst, (bitconvert GR32:$src))],
 | 
					                      [(set FR32X:$dst, (bitconvert GR32:$src))],
 | 
				
			||||||
                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
 | 
					                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def VMOVDI2SSZrm  : AVX512SI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
 | 
					def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
 | 
				
			||||||
                      "vmovd\t{$src, $dst|$dst, $src}",
 | 
					                      "vmovd\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                      [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
 | 
					                      [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
 | 
				
			||||||
                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
 | 
					                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Move Packed Doubleword Int to Packed Double Int
 | 
					// Move doubleword from xmm register to r/m32
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
def VMOVPDI2DIZrr  : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
 | 
					def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
 | 
				
			||||||
                       "vmovd\t{$src, $dst|$dst, $src}",
 | 
					                       "vmovd\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                       [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
 | 
					                       [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
 | 
				
			||||||
                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
 | 
					                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
 | 
				
			||||||
                       EVEX, VEX_LIG;
 | 
					                       EVEX, VEX_LIG;
 | 
				
			||||||
def VMOVPDI2DIZmr  : AVX512SI<0x7E, MRMDestMem, (outs),
 | 
					def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
 | 
				
			||||||
                       (ins i32mem:$dst, VR128X:$src),
 | 
					                       (ins i32mem:$dst, VR128X:$src),
 | 
				
			||||||
                       "vmovd\t{$src, $dst|$dst, $src}",
 | 
					                       "vmovd\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                       [(store (i32 (vector_extract (v4i32 VR128X:$src),
 | 
					                       [(store (i32 (vector_extract (v4i32 VR128X:$src),
 | 
				
			||||||
                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
 | 
					                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
 | 
				
			||||||
                       EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
 | 
					                       EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Move Packed Doubleword Int first element to Doubleword Int
 | 
					// Move quadword from xmm1 register to r/m64
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
 | 
					def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
 | 
				
			||||||
                      "vmovq\t{$src, $dst|$dst, $src}",
 | 
					                      "vmovq\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
| 
						 | 
					@ -1435,12 +1435,12 @@ def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
 | 
				
			||||||
// Move Scalar Single to Double Int
 | 
					// Move Scalar Single to Double Int
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
let isCodeGenOnly = 1 in {
 | 
					let isCodeGenOnly = 1 in {
 | 
				
			||||||
def VMOVSS2DIZrr  : AVX512SI<0x7E, MRMDestReg, (outs GR32:$dst),
 | 
					def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
 | 
				
			||||||
                      (ins FR32X:$src),
 | 
					                      (ins FR32X:$src),
 | 
				
			||||||
                      "vmovd\t{$src, $dst|$dst, $src}",
 | 
					                      "vmovd\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                      [(set GR32:$dst, (bitconvert FR32X:$src))],
 | 
					                      [(set GR32:$dst, (bitconvert FR32X:$src))],
 | 
				
			||||||
                      IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
 | 
					                      IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
 | 
				
			||||||
def VMOVSS2DIZmr  : AVX512SI<0x7E, MRMDestMem, (outs),
 | 
					def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
 | 
				
			||||||
                      (ins i32mem:$dst, FR32X:$src),
 | 
					                      (ins i32mem:$dst, FR32X:$src),
 | 
				
			||||||
                      "vmovd\t{$src, $dst|$dst, $src}",
 | 
					                      "vmovd\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                      [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
 | 
					                      [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
 | 
				
			||||||
| 
						 | 
					@ -1449,7 +1449,7 @@ def VMOVSS2DIZmr  : AVX512SI<0x7E, MRMDestMem, (outs),
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Move Quadword Int to Packed Quadword Int
 | 
					// Move Quadword Int to Packed Quadword Int
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
def VMOVQI2PQIZrm : AVX512SI<0x6E, MRMSrcMem, (outs VR128X:$dst),
 | 
					def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
 | 
				
			||||||
                      (ins i64mem:$src),
 | 
					                      (ins i64mem:$src),
 | 
				
			||||||
                      "vmovq\t{$src, $dst|$dst, $src}",
 | 
					                      "vmovq\t{$src, $dst|$dst, $src}",
 | 
				
			||||||
                      [(set VR128X:$dst,
 | 
					                      [(set VR128X:$dst,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -644,10 +644,6 @@ class AVX512BIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 | 
				
			||||||
              list<dag> pattern, InstrItinClass itin = NoItinerary>
 | 
					              list<dag> pattern, InstrItinClass itin = NoItinerary>
 | 
				
			||||||
      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, PD,
 | 
					      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, PD,
 | 
				
			||||||
        Requires<[HasAVX512]>;
 | 
					        Requires<[HasAVX512]>;
 | 
				
			||||||
class AVX512SI<bits<8> o, Format F, dag outs, dag ins, string asm,
 | 
					 | 
				
			||||||
            list<dag> pattern, InstrItinClass itin = NoItinerary>
 | 
					 | 
				
			||||||
      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, PD,
 | 
					 | 
				
			||||||
        Requires<[HasAVX512]>;
 | 
					 | 
				
			||||||
class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 | 
					class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 | 
				
			||||||
              list<dag> pattern, InstrItinClass itin = NoItinerary>
 | 
					              list<dag> pattern, InstrItinClass itin = NoItinerary>
 | 
				
			||||||
      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
 | 
					      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -73,4 +73,26 @@ if.end:                                           ; preds = %entry
 | 
				
			||||||
return:                                           ; preds = %if.end, %entry
 | 
					return:                                           ; preds = %if.end, %entry
 | 
				
			||||||
  %retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ]
 | 
					  %retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ]
 | 
				
			||||||
  ret float %retval.0
 | 
					  ret float %retval.0
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; CHECK-LABEL: test6
 | 
				
			||||||
 | 
					; CHECK: cmpl
 | 
				
			||||||
 | 
					; CHECK-NOT: kmov
 | 
				
			||||||
 | 
					; CHECK: ret
 | 
				
			||||||
 | 
					define i32 @test6(i32 %a, i32 %b) {
 | 
				
			||||||
 | 
					  %cmp = icmp eq i32 %a, %b
 | 
				
			||||||
 | 
					  %res = zext i1 %cmp to i32
 | 
				
			||||||
 | 
					  ret i32 %res
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; CHECK-LABEL: test7
 | 
				
			||||||
 | 
					; CHECK: vucomisd
 | 
				
			||||||
 | 
					; CHECK-NOT: kmov
 | 
				
			||||||
 | 
					; CHECK: ret
 | 
				
			||||||
 | 
					define i32 @test7(double %x, double %y) #2 {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  %0 = fcmp one double %x, %y
 | 
				
			||||||
 | 
					  %or = zext i1 %0 to i32
 | 
				
			||||||
 | 
					  ret i32 %or
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue