forked from OSchip/llvm-project
				
			AVX-512: Fixed extract_vector_elt for v8i1 vector
llvm-svn: 202624
This commit is contained in:
		
							parent
							
								
									03b4e3a8cb
								
							
						
					
					
						commit
						9737e3886b
					
				| 
						 | 
					@ -7697,7 +7697,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// Extract one bit from mask vector, like v16i1 or v8i1.
 | 
					/// Extract one bit from mask vector, like v16i1 or v8i1.
 | 
				
			||||||
/// AVX-512 feature.
 | 
					/// AVX-512 feature.
 | 
				
			||||||
static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) {
 | 
					SDValue
 | 
				
			||||||
 | 
					X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			||||||
  SDValue Vec = Op.getOperand(0);
 | 
					  SDValue Vec = Op.getOperand(0);
 | 
				
			||||||
  SDLoc dl(Vec);
 | 
					  SDLoc dl(Vec);
 | 
				
			||||||
  MVT VecVT = Vec.getSimpleValueType();
 | 
					  MVT VecVT = Vec.getSimpleValueType();
 | 
				
			||||||
| 
						 | 
					@ -7717,7 +7718,8 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) {
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
 | 
					  unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
 | 
				
			||||||
  unsigned MaxSift = VecVT.getSizeInBits() - 1;
 | 
					  const TargetRegisterClass* rc = getRegClassFor(VecVT);
 | 
				
			||||||
 | 
					  unsigned MaxSift = rc->getSize()*8 - 1;
 | 
				
			||||||
  Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
 | 
					  Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
 | 
				
			||||||
                    DAG.getConstant(MaxSift - IdxVal, MVT::i8));
 | 
					                    DAG.getConstant(MaxSift - IdxVal, MVT::i8));
 | 
				
			||||||
  Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
 | 
					  Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -867,6 +867,7 @@ namespace llvm {
 | 
				
			||||||
    SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
 | 
					    SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
 | 
				
			||||||
    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
 | 
					    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
 | 
				
			||||||
    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
 | 
					    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
 | 
				
			||||||
 | 
					    SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const;
 | 
				
			||||||
    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
 | 
					    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
 | 
				
			||||||
    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
 | 
					    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
 | 
				
			||||||
    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
 | 
					    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1213,6 +1213,11 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
 | 
				
			||||||
def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
 | 
					def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
 | 
				
			||||||
          (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
 | 
					          (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
 | 
				
			||||||
 | 
					          (v8i1 (COPY_TO_REGCLASS (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def : Pat<(v8i1 (X86vsrli VK8:$src, (i8 imm:$imm))),
 | 
				
			||||||
 | 
					          (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri (COPY_TO_REGCLASS VK8:$src, VK16), (I8Imm $imm)), VK8))>;
 | 
				
			||||||
//===----------------------------------------------------------------------===//
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
// AVX-512 - Aligned and unaligned load and store
 | 
					// AVX-512 - Aligned and unaligned load and store
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -466,10 +466,10 @@ def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
 | 
				
			||||||
// The size of the all masked registers is 16 bit because we have only one
 | 
					// The size of the all masked registers is 16 bit because we have only one
 | 
				
			||||||
// KMOVW istruction that can store this register in memory, and it writes 2 bytes
 | 
					// KMOVW istruction that can store this register in memory, and it writes 2 bytes
 | 
				
			||||||
def VK1     : RegisterClass<"X86", [i1],    16, (sequence "K%u", 0, 7)>;
 | 
					def VK1     : RegisterClass<"X86", [i1],    16, (sequence "K%u", 0, 7)>;
 | 
				
			||||||
def VK8     : RegisterClass<"X86", [v8i1],  16, (sequence "K%u", 0, 7)>;
 | 
					def VK8     : RegisterClass<"X86", [v8i1],  16, (add VK1)> {let Size = 16;}
 | 
				
			||||||
def VK16    : RegisterClass<"X86", [v16i1], 16, (add VK8)>;
 | 
					def VK16    : RegisterClass<"X86", [v16i1], 16, (add VK8)> {let Size = 16;}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def VK1WM   : RegisterClass<"X86", [i1],    16, (sub VK1, K0)>;
 | 
					def VK1WM   : RegisterClass<"X86", [i1],    16, (sub VK1, K0)> {let Size = 16;}
 | 
				
			||||||
def VK8WM   : RegisterClass<"X86", [v8i1],  16, (sub VK8, K0)>;
 | 
					def VK8WM   : RegisterClass<"X86", [v8i1],  16, (sub VK8, K0)> {let Size = 16;}
 | 
				
			||||||
def VK16WM  : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>;
 | 
					def VK16WM  : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -91,7 +91,7 @@ define float @test9(<8 x float> %x, i32 %ind) nounwind {
 | 
				
			||||||
;CHECK-LABEL: test10
 | 
					;CHECK-LABEL: test10
 | 
				
			||||||
;CHECK: vmovd
 | 
					;CHECK: vmovd
 | 
				
			||||||
;CHECK: vpermd %zmm
 | 
					;CHECK: vpermd %zmm
 | 
				
			||||||
;CHEKK: vmovdz  %xmm0, %eax
 | 
					;CHECK: vmovd  %xmm0, %eax
 | 
				
			||||||
;CHECK: ret
 | 
					;CHECK: ret
 | 
				
			||||||
define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
 | 
					define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
 | 
				
			||||||
  %e = extractelement <16 x i32> %x, i32 %ind
 | 
					  %e = extractelement <16 x i32> %x, i32 %ind
 | 
				
			||||||
| 
						 | 
					@ -100,8 +100,8 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;CHECK-LABEL: test11
 | 
					;CHECK-LABEL: test11
 | 
				
			||||||
;CHECK: vpcmpltud
 | 
					;CHECK: vpcmpltud
 | 
				
			||||||
;CKECK: kshiftlw $11
 | 
					;CHECK: kshiftlw $11
 | 
				
			||||||
;CKECK: kshiftrw $15
 | 
					;CHECK: kshiftrw $15
 | 
				
			||||||
;CHECK: kortestw
 | 
					;CHECK: kortestw
 | 
				
			||||||
;CHECK: je
 | 
					;CHECK: je
 | 
				
			||||||
;CHECK: ret
 | 
					;CHECK: ret
 | 
				
			||||||
| 
						 | 
					@ -119,8 +119,8 @@ define <16 x i32> @test11(<16 x i32>%a, <16 x i32>%b) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;CHECK-LABEL: test12
 | 
					;CHECK-LABEL: test12
 | 
				
			||||||
;CHECK: vpcmpgtq
 | 
					;CHECK: vpcmpgtq
 | 
				
			||||||
;CKECK: kshiftlw $15
 | 
					;CHECK: kshiftlw $15
 | 
				
			||||||
;CKECK: kshiftrw $15
 | 
					;CHECK: kshiftrw $15
 | 
				
			||||||
;CHECK: kortestw
 | 
					;CHECK: kortestw
 | 
				
			||||||
;CHECK: ret
 | 
					;CHECK: ret
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -135,7 +135,7 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
 | 
				
			||||||
;CHECK-LABEL: test13
 | 
					;CHECK-LABEL: test13
 | 
				
			||||||
;CHECK: cmpl
 | 
					;CHECK: cmpl
 | 
				
			||||||
;CHECK: sbbl
 | 
					;CHECK: sbbl
 | 
				
			||||||
;CKECK: orl $65532
 | 
					;CHECK: orl $65532
 | 
				
			||||||
;CHECK: ret
 | 
					;CHECK: ret
 | 
				
			||||||
define i16 @test13(i32 %a, i32 %b) {
 | 
					define i16 @test13(i32 %a, i32 %b) {
 | 
				
			||||||
  %cmp_res = icmp ult i32 %a, %b
 | 
					  %cmp_res = icmp ult i32 %a, %b
 | 
				
			||||||
| 
						 | 
					@ -144,5 +144,17 @@ define i16 @test13(i32 %a, i32 %b) {
 | 
				
			||||||
  ret i16 %res
 | 
					  ret i16 %res
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					;CHECK-LABEL: test14
 | 
				
			||||||
 | 
					;CHECK: vpcmpgtq
 | 
				
			||||||
 | 
					;CHECK: kshiftlw $11
 | 
				
			||||||
 | 
					;CHECK: kshiftrw $15
 | 
				
			||||||
 | 
					;CHECK: kortestw
 | 
				
			||||||
 | 
					;CHECK: ret
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %cmpvector_func.i = icmp slt <8 x i64> %a, %b
 | 
				
			||||||
 | 
					  %extract24vector_func.i = extractelement <8 x i1> %cmpvector_func.i, i32 4
 | 
				
			||||||
 | 
					  %res = select i1 %extract24vector_func.i, i64 %a1, i64 %b1
 | 
				
			||||||
 | 
					  ret i64 %res
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue