[LLVM][x86][Inline Asm] support for GCC style inline asm - Y<x> constraints
This patch is intended to enable the use of basic double letter constraints used in GCC extended inline asm {Yi Y2 Yz Y0 Ym Yt}.
Supersedes D35204
Clang counterpart: D36371
Differential Revision: https://reviews.llvm.org/D36369
llvm-svn: 311644
			
			
This commit is contained in:
		
							parent
							
								
									7b49dc9c68
								
							
						
					
					
						commit
						21c312d8c6
					
				| 
						 | 
					@ -36057,8 +36057,8 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
 | 
				
			||||||
    case 'v':
 | 
					    case 'v':
 | 
				
			||||||
    case 'Y':
 | 
					    case 'Y':
 | 
				
			||||||
    case 'l':
 | 
					    case 'l':
 | 
				
			||||||
      return C_RegisterClass;
 | 
					 | 
				
			||||||
    case 'k': // AVX512 masking registers.
 | 
					    case 'k': // AVX512 masking registers.
 | 
				
			||||||
 | 
					      return C_RegisterClass;
 | 
				
			||||||
    case 'a':
 | 
					    case 'a':
 | 
				
			||||||
    case 'b':
 | 
					    case 'b':
 | 
				
			||||||
    case 'c':
 | 
					    case 'c':
 | 
				
			||||||
| 
						 | 
					@ -36090,8 +36090,15 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
 | 
				
			||||||
      switch (Constraint[1]) {
 | 
					      switch (Constraint[1]) {
 | 
				
			||||||
      default:
 | 
					      default:
 | 
				
			||||||
        break;
 | 
					        break;
 | 
				
			||||||
      case 'k':
 | 
					      case 'z':
 | 
				
			||||||
 | 
					      case '0':
 | 
				
			||||||
        return C_Register;
 | 
					        return C_Register;
 | 
				
			||||||
 | 
					      case 'i':
 | 
				
			||||||
 | 
					      case 'm':
 | 
				
			||||||
 | 
					      case 'k':
 | 
				
			||||||
 | 
					      case 't':
 | 
				
			||||||
 | 
					      case '2':
 | 
				
			||||||
 | 
					        return C_RegisterClass;
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
| 
						 | 
					@ -36139,15 +36146,42 @@ TargetLowering::ConstraintWeight
 | 
				
			||||||
    if (type->isX86_MMXTy() && Subtarget.hasMMX())
 | 
					    if (type->isX86_MMXTy() && Subtarget.hasMMX())
 | 
				
			||||||
      weight = CW_SpecificReg;
 | 
					      weight = CW_SpecificReg;
 | 
				
			||||||
    break;
 | 
					    break;
 | 
				
			||||||
  case 'Y':
 | 
					  case 'Y': {
 | 
				
			||||||
    // Other "Y<x>" (e.g. "Yk") constraints should be implemented below.
 | 
					    unsigned Size = StringRef(constraint).size();
 | 
				
			||||||
    if (constraint[1] == 'k') {
 | 
					    // Pick 'i' as the next char as 'Yi' and 'Y' are synonymous, when matching 'Y'
 | 
				
			||||||
      // Support for 'Yk' (similarly to the 'k' variant below).
 | 
					    char NextChar = Size == 2 ? constraint[1] : 'i';
 | 
				
			||||||
      weight = CW_SpecificReg;
 | 
					    if (Size > 2)
 | 
				
			||||||
      break;
 | 
					      break;
 | 
				
			||||||
 | 
					    switch (NextChar) {
 | 
				
			||||||
 | 
					      default:
 | 
				
			||||||
 | 
					        return CW_Invalid;
 | 
				
			||||||
 | 
					      // XMM0
 | 
				
			||||||
 | 
					      case 'z':
 | 
				
			||||||
 | 
					      case '0':
 | 
				
			||||||
 | 
					        if ((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1())
 | 
				
			||||||
 | 
					          return CW_SpecificReg;
 | 
				
			||||||
 | 
					        return CW_Invalid;
 | 
				
			||||||
 | 
					      // Conditional OpMask regs (AVX512)
 | 
				
			||||||
 | 
					      case 'k':
 | 
				
			||||||
 | 
					        if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())
 | 
				
			||||||
 | 
					          return CW_Register;
 | 
				
			||||||
 | 
					        return CW_Invalid;
 | 
				
			||||||
 | 
					      // Any MMX reg
 | 
				
			||||||
 | 
					      case 'm':
 | 
				
			||||||
 | 
					        if (type->isX86_MMXTy() && Subtarget.hasMMX())
 | 
				
			||||||
 | 
					          return weight;
 | 
				
			||||||
 | 
					        return CW_Invalid;
 | 
				
			||||||
 | 
					      // Any SSE reg when ISA >= SSE2, same as 'Y'
 | 
				
			||||||
 | 
					      case 'i':
 | 
				
			||||||
 | 
					      case 't':
 | 
				
			||||||
 | 
					      case '2':
 | 
				
			||||||
 | 
					        if (!Subtarget.hasSSE2())
 | 
				
			||||||
 | 
					          return CW_Invalid;
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  // Else fall through (handle "Y" constraint).
 | 
					    // Fall through (handle "Y" constraint).
 | 
				
			||||||
    LLVM_FALLTHROUGH;
 | 
					    LLVM_FALLTHROUGH;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
  case 'v':
 | 
					  case 'v':
 | 
				
			||||||
    if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
 | 
					    if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
 | 
				
			||||||
      weight = CW_Register;
 | 
					      weight = CW_Register;
 | 
				
			||||||
| 
						 | 
					@ -36159,7 +36193,8 @@ TargetLowering::ConstraintWeight
 | 
				
			||||||
    break;
 | 
					    break;
 | 
				
			||||||
  case 'k':
 | 
					  case 'k':
 | 
				
			||||||
    // Enable conditional vector operations using %k<#> registers.
 | 
					    // Enable conditional vector operations using %k<#> registers.
 | 
				
			||||||
    weight = CW_SpecificReg;
 | 
					    if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512())
 | 
				
			||||||
 | 
					      weight = CW_Register;
 | 
				
			||||||
    break;
 | 
					    break;
 | 
				
			||||||
  case 'I':
 | 
					  case 'I':
 | 
				
			||||||
    if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
 | 
					    if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
 | 
				
			||||||
| 
						 | 
					@ -36561,6 +36596,17 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
 | 
				
			||||||
    switch (Constraint[1]) {
 | 
					    switch (Constraint[1]) {
 | 
				
			||||||
    default:
 | 
					    default:
 | 
				
			||||||
      break;
 | 
					      break;
 | 
				
			||||||
 | 
					    case 'i':
 | 
				
			||||||
 | 
					    case 't':
 | 
				
			||||||
 | 
					    case '2':
 | 
				
			||||||
 | 
					      return getRegForInlineAsmConstraint(TRI, "Y", VT);
 | 
				
			||||||
 | 
					    case 'm':
 | 
				
			||||||
 | 
					      if (!Subtarget.hasMMX()) break;
 | 
				
			||||||
 | 
					      return std::make_pair(0U, &X86::VR64RegClass);
 | 
				
			||||||
 | 
					    case 'z':
 | 
				
			||||||
 | 
					    case '0':
 | 
				
			||||||
 | 
					      if (!Subtarget.hasSSE1()) break;
 | 
				
			||||||
 | 
					      return std::make_pair(X86::XMM0, &X86::VR128RegClass);
 | 
				
			||||||
    case 'k':
 | 
					    case 'k':
 | 
				
			||||||
      // This register class doesn't allocate k0 for masked vector operation.
 | 
					      // This register class doesn't allocate k0 for masked vector operation.
 | 
				
			||||||
      if (Subtarget.hasAVX512()) { // Only supported in AVX512.
 | 
					      if (Subtarget.hasAVX512()) { // Only supported in AVX512.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,83 @@
 | 
				
			||||||
 | 
					; RUN: llc -mtriple=x86_64-apple-darwin -mcpu skx < %s | FileCheck %s
 | 
				
			||||||
 | 
					; This test compliments the .c test under clang/test/CodeGen/. We check 
 | 
				
			||||||
 | 
					; if the inline asm constraints are respected in the generated code.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; Function Attrs: nounwind
 | 
				
			||||||
 | 
					define void @f_Ym(i64 %m.coerce) {
 | 
				
			||||||
 | 
					; Any mmx regiter constraint
 | 
				
			||||||
 | 
					; CHECK-LABEL: f_Ym:
 | 
				
			||||||
 | 
					; CHECK:         ## InlineAsm Start
 | 
				
			||||||
 | 
					; CHECK-NEXT:    movq %mm{{[0-9]+}}, %mm1
 | 
				
			||||||
 | 
					; CHECK:         ## InlineAsm End
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  %0 = tail call x86_mmx asm sideeffect "movq $0, %mm1\0A\09", "=^Ym,~{dirflag},~{fpsr},~{flags}"() 
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; Function Attrs: nounwind
 | 
				
			||||||
 | 
					define void @f_Yi(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
 | 
				
			||||||
 | 
					; Any SSE register when SSE2 is enabled (GCC when inter-unit moves enabled)
 | 
				
			||||||
 | 
					; CHECK-LABEL: f_Yi:
 | 
				
			||||||
 | 
					; CHECK:         ## InlineAsm Start
 | 
				
			||||||
 | 
					; CHECK-NEXT:    vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
 | 
				
			||||||
 | 
					; CHECK:         ## InlineAsm End
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  %0 = tail call <4 x float> asm sideeffect "vpaddq $0, $1, $2\0A\09", "=^Yi,^Yi,^Yi,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z) 
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; Function Attrs: nounwind
 | 
				
			||||||
 | 
					define void @f_Yt(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
 | 
				
			||||||
 | 
					; Any SSE register when SSE2 is enabled
 | 
				
			||||||
 | 
					; CHECK-LABEL: f_Yt:
 | 
				
			||||||
 | 
					; CHECK:         ## InlineAsm Start
 | 
				
			||||||
 | 
					; CHECK-NEXT:    vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
 | 
				
			||||||
 | 
					; CHECK:         ## InlineAsm End
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  %0 = tail call <4 x float> asm sideeffect "vpaddq $0, $1, $2\0A\09", "=^Yt,^Yt,^Yt,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z)
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; Function Attrs: nounwind
 | 
				
			||||||
 | 
					define void @f_Y2(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
 | 
				
			||||||
 | 
					; Any SSE register when SSE2 is enabled
 | 
				
			||||||
 | 
					; CHECK-LABEL: f_Y2:
 | 
				
			||||||
 | 
					; CHECK:         ## InlineAsm Start
 | 
				
			||||||
 | 
					; CHECK-NEXT:    vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
 | 
				
			||||||
 | 
					; CHECK:         ## InlineAsm End
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  %0 = tail call <4 x float> asm sideeffect "vpaddq $0, $1, $2\0A\09", "=^Y2,^Y2,^Y2,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z)
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; Function Attrs: nounwind
 | 
				
			||||||
 | 
					define void @f_Yz(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
 | 
				
			||||||
 | 
					; xmm0 SSE register(GCC)
 | 
				
			||||||
 | 
					; CHECK-LABEL: f_Yz:
 | 
				
			||||||
 | 
					; CHECK:         ## InlineAsm Start
 | 
				
			||||||
 | 
					; CHECK-NEXT:    vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm0
 | 
				
			||||||
 | 
					; CHECK-NEXT:    vpaddq %xmm0, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
 | 
				
			||||||
 | 
					; CHECK:         ## InlineAsm End
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  %0 = tail call { <4 x float>, <4 x float> } asm sideeffect "vpaddq $0,$2,$1\0A\09vpaddq $1,$0,$2\0A\09", "=^Yi,=^Yz,^Yi,0,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z)
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; Function Attrs: nounwind
 | 
				
			||||||
 | 
					define void @f_Y0(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
 | 
				
			||||||
 | 
					; xmm0 SSE register
 | 
				
			||||||
 | 
					; CHECK-LABEL: f_Y0:
 | 
				
			||||||
 | 
					; CHECK:         ## InlineAsm Start
 | 
				
			||||||
 | 
					; CHECK-NEXT:    vpaddq %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm0
 | 
				
			||||||
 | 
					; CHECK-NEXT:    vpaddq %xmm0, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
 | 
				
			||||||
 | 
					; CHECK:         ## InlineAsm End
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  %0 = tail call { <4 x float>, <4 x float> } asm sideeffect "vpaddq $0,$2,$1\0A\09vpaddq $1,$0,$2\0A\09", "=^Yi,=^Y0,^Yi,0,~{dirflag},~{fpsr},~{flags}"(<4 x float> %y, <4 x float> %z)
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue