ARM64: switch to IR-based atomic operations.
Goodbye code! (Game: spot the bug fixed by the change). llvm-svn: 206490
This commit is contained in:
		
							parent
							
								
									0129f298c4
								
							
						
					
					
						commit
						11a6082e33
					
				| 
						 | 
					@ -157,9 +157,6 @@ public:
 | 
				
			||||||
  SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
 | 
					  SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
 | 
				
			||||||
  SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
 | 
					  SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  SDNode *SelectAtomic(SDNode *Node, unsigned Op8, unsigned Op16, unsigned Op32,
 | 
					 | 
				
			||||||
                       unsigned Op64);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  SDNode *SelectBitfieldExtractOp(SDNode *N);
 | 
					  SDNode *SelectBitfieldExtractOp(SDNode *N);
 | 
				
			||||||
  SDNode *SelectBitfieldInsertOp(SDNode *N);
 | 
					  SDNode *SelectBitfieldInsertOp(SDNode *N);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1138,37 +1135,6 @@ SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
 | 
				
			||||||
  return St;
 | 
					  return St;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SDNode *ARM64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
 | 
					 | 
				
			||||||
                                        unsigned Op16, unsigned Op32,
 | 
					 | 
				
			||||||
                                        unsigned Op64) {
 | 
					 | 
				
			||||||
  // Mostly direct translation to the given operations, except that we preserve
 | 
					 | 
				
			||||||
  // the AtomicOrdering for use later on.
 | 
					 | 
				
			||||||
  AtomicSDNode *AN = cast<AtomicSDNode>(Node);
 | 
					 | 
				
			||||||
  EVT VT = AN->getMemoryVT();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  unsigned Op;
 | 
					 | 
				
			||||||
  if (VT == MVT::i8)
 | 
					 | 
				
			||||||
    Op = Op8;
 | 
					 | 
				
			||||||
  else if (VT == MVT::i16)
 | 
					 | 
				
			||||||
    Op = Op16;
 | 
					 | 
				
			||||||
  else if (VT == MVT::i32)
 | 
					 | 
				
			||||||
    Op = Op32;
 | 
					 | 
				
			||||||
  else if (VT == MVT::i64)
 | 
					 | 
				
			||||||
    Op = Op64;
 | 
					 | 
				
			||||||
  else
 | 
					 | 
				
			||||||
    llvm_unreachable("Unexpected atomic operation");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  SmallVector<SDValue, 4> Ops;
 | 
					 | 
				
			||||||
  for (unsigned i = 1; i < AN->getNumOperands(); ++i)
 | 
					 | 
				
			||||||
    Ops.push_back(AN->getOperand(i));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
 | 
					 | 
				
			||||||
  Ops.push_back(AN->getOperand(0)); // Chain moves to the end
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other,
 | 
					 | 
				
			||||||
                              &Ops[0], Ops.size());
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
 | 
					static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
 | 
				
			||||||
                                       unsigned &Opc, SDValue &Opd0,
 | 
					                                       unsigned &Opc, SDValue &Opd0,
 | 
				
			||||||
                                       unsigned &LSB, unsigned &MSB,
 | 
					                                       unsigned &LSB, unsigned &MSB,
 | 
				
			||||||
| 
						 | 
					@ -1829,54 +1795,6 @@ SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
 | 
				
			||||||
      return I;
 | 
					      return I;
 | 
				
			||||||
    break;
 | 
					    break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  case ISD::ATOMIC_LOAD_ADD:
 | 
					 | 
				
			||||||
    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_ADD_I8,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_ADD_I16, ARM64::ATOMIC_LOAD_ADD_I32,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_ADD_I64);
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_SUB:
 | 
					 | 
				
			||||||
    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_SUB_I8,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_SUB_I16, ARM64::ATOMIC_LOAD_SUB_I32,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_SUB_I64);
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_AND:
 | 
					 | 
				
			||||||
    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_AND_I8,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_AND_I16, ARM64::ATOMIC_LOAD_AND_I32,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_AND_I64);
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_OR:
 | 
					 | 
				
			||||||
    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_OR_I8,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_OR_I16, ARM64::ATOMIC_LOAD_OR_I32,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_OR_I64);
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_XOR:
 | 
					 | 
				
			||||||
    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_XOR_I8,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_XOR_I16, ARM64::ATOMIC_LOAD_XOR_I32,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_XOR_I64);
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_NAND:
 | 
					 | 
				
			||||||
    return SelectAtomic(
 | 
					 | 
				
			||||||
        Node, ARM64::ATOMIC_LOAD_NAND_I8, ARM64::ATOMIC_LOAD_NAND_I16,
 | 
					 | 
				
			||||||
        ARM64::ATOMIC_LOAD_NAND_I32, ARM64::ATOMIC_LOAD_NAND_I64);
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_MIN:
 | 
					 | 
				
			||||||
    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MIN_I8,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_MIN_I16, ARM64::ATOMIC_LOAD_MIN_I32,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_MIN_I64);
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_MAX:
 | 
					 | 
				
			||||||
    return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MAX_I8,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_MAX_I16, ARM64::ATOMIC_LOAD_MAX_I32,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_LOAD_MAX_I64);
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_UMIN:
 | 
					 | 
				
			||||||
    return SelectAtomic(
 | 
					 | 
				
			||||||
        Node, ARM64::ATOMIC_LOAD_UMIN_I8, ARM64::ATOMIC_LOAD_UMIN_I16,
 | 
					 | 
				
			||||||
        ARM64::ATOMIC_LOAD_UMIN_I32, ARM64::ATOMIC_LOAD_UMIN_I64);
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_UMAX:
 | 
					 | 
				
			||||||
    return SelectAtomic(
 | 
					 | 
				
			||||||
        Node, ARM64::ATOMIC_LOAD_UMAX_I8, ARM64::ATOMIC_LOAD_UMAX_I16,
 | 
					 | 
				
			||||||
        ARM64::ATOMIC_LOAD_UMAX_I32, ARM64::ATOMIC_LOAD_UMAX_I64);
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_SWAP:
 | 
					 | 
				
			||||||
    return SelectAtomic(Node, ARM64::ATOMIC_SWAP_I8, ARM64::ATOMIC_SWAP_I16,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_SWAP_I32, ARM64::ATOMIC_SWAP_I64);
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_CMP_SWAP:
 | 
					 | 
				
			||||||
    return SelectAtomic(Node, ARM64::ATOMIC_CMP_SWAP_I8,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_CMP_SWAP_I16, ARM64::ATOMIC_CMP_SWAP_I32,
 | 
					 | 
				
			||||||
                        ARM64::ATOMIC_CMP_SWAP_I64);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ISD::LOAD: {
 | 
					  case ISD::LOAD: {
 | 
				
			||||||
    // Try to select as an indexed load. Fall through to normal processing
 | 
					    // Try to select as an indexed load. Fall through to normal processing
 | 
				
			||||||
    // if we can't.
 | 
					    // if we can't.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -222,26 +222,6 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
 | 
				
			||||||
  setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
 | 
					  setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
 | 
				
			||||||
  setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
 | 
					  setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // 128-bit atomics
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_SWAP, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  // These are surprisingly difficult. The only single-copy atomic 128-bit
 | 
					 | 
				
			||||||
  // instruction on AArch64 is stxp (when it succeeds). So a store can safely
 | 
					 | 
				
			||||||
  // become a simple swap, but a load can only be determined to have been atomic
 | 
					 | 
				
			||||||
  // if storing the same value back succeeds.
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
 | 
					 | 
				
			||||||
  setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Expand);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Variable arguments.
 | 
					  // Variable arguments.
 | 
				
			||||||
  setOperationAction(ISD::VASTART, MVT::Other, Custom);
 | 
					  setOperationAction(ISD::VASTART, MVT::Other, Custom);
 | 
				
			||||||
  setOperationAction(ISD::VAARG, MVT::Other, Custom);
 | 
					  setOperationAction(ISD::VAARG, MVT::Other, Custom);
 | 
				
			||||||
| 
						 | 
					@ -706,437 +686,6 @@ const char *ARM64TargetLowering::getTargetNodeName(unsigned Opcode) const {
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
 | 
					 | 
				
			||||||
                                  unsigned &LdrOpc, unsigned &StrOpc) {
 | 
					 | 
				
			||||||
  static unsigned LoadBares[] = { ARM64::LDXRB, ARM64::LDXRH, ARM64::LDXRW,
 | 
					 | 
				
			||||||
                                  ARM64::LDXRX, ARM64::LDXPX };
 | 
					 | 
				
			||||||
  static unsigned LoadAcqs[] = { ARM64::LDAXRB, ARM64::LDAXRH, ARM64::LDAXRW,
 | 
					 | 
				
			||||||
                                 ARM64::LDAXRX, ARM64::LDAXPX };
 | 
					 | 
				
			||||||
  static unsigned StoreBares[] = { ARM64::STXRB, ARM64::STXRH, ARM64::STXRW,
 | 
					 | 
				
			||||||
                                   ARM64::STXRX, ARM64::STXPX };
 | 
					 | 
				
			||||||
  static unsigned StoreRels[] = { ARM64::STLXRB, ARM64::STLXRH, ARM64::STLXRW,
 | 
					 | 
				
			||||||
                                  ARM64::STLXRX, ARM64::STLXPX };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  unsigned *LoadOps, *StoreOps;
 | 
					 | 
				
			||||||
  if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
 | 
					 | 
				
			||||||
    LoadOps = LoadAcqs;
 | 
					 | 
				
			||||||
  else
 | 
					 | 
				
			||||||
    LoadOps = LoadBares;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
 | 
					 | 
				
			||||||
    StoreOps = StoreRels;
 | 
					 | 
				
			||||||
  else
 | 
					 | 
				
			||||||
    StoreOps = StoreBares;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  assert(isPowerOf2_32(Size) && Size <= 16 &&
 | 
					 | 
				
			||||||
         "unsupported size for atomic binary op!");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  LdrOpc = LoadOps[Log2_32(Size)];
 | 
					 | 
				
			||||||
  StrOpc = StoreOps[Log2_32(Size)];
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
MachineBasicBlock *ARM64TargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
 | 
					 | 
				
			||||||
                                                          MachineBasicBlock *BB,
 | 
					 | 
				
			||||||
                                                          unsigned Size) const {
 | 
					 | 
				
			||||||
  unsigned dest = MI->getOperand(0).getReg();
 | 
					 | 
				
			||||||
  unsigned ptr = MI->getOperand(1).getReg();
 | 
					 | 
				
			||||||
  unsigned oldval = MI->getOperand(2).getReg();
 | 
					 | 
				
			||||||
  unsigned newval = MI->getOperand(3).getReg();
 | 
					 | 
				
			||||||
  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
 | 
					 | 
				
			||||||
  unsigned scratch = BB->getParent()->getRegInfo().createVirtualRegister(
 | 
					 | 
				
			||||||
      &ARM64::GPR32RegClass);
 | 
					 | 
				
			||||||
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 | 
					 | 
				
			||||||
  DebugLoc dl = MI->getDebugLoc();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // FIXME: We currently always generate a seq_cst operation; we should
 | 
					 | 
				
			||||||
  // be able to relax this in some cases.
 | 
					 | 
				
			||||||
  unsigned ldrOpc, strOpc;
 | 
					 | 
				
			||||||
  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MachineFunction *MF = BB->getParent();
 | 
					 | 
				
			||||||
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
 | 
					 | 
				
			||||||
  MachineFunction::iterator It = BB;
 | 
					 | 
				
			||||||
  ++It; // insert the new blocks after the current block
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
 | 
					 | 
				
			||||||
  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
 | 
					 | 
				
			||||||
  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
 | 
					 | 
				
			||||||
  MF->insert(It, loop1MBB);
 | 
					 | 
				
			||||||
  MF->insert(It, loop2MBB);
 | 
					 | 
				
			||||||
  MF->insert(It, exitMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Transfer the remainder of BB and its successor edges to exitMBB.
 | 
					 | 
				
			||||||
  exitMBB->splice(exitMBB->begin(), BB,
 | 
					 | 
				
			||||||
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
 | 
					 | 
				
			||||||
  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  thisMBB:
 | 
					 | 
				
			||||||
  //   ...
 | 
					 | 
				
			||||||
  //   fallthrough --> loop1MBB
 | 
					 | 
				
			||||||
  BB->addSuccessor(loop1MBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // loop1MBB:
 | 
					 | 
				
			||||||
  //   ldrex dest, [ptr]
 | 
					 | 
				
			||||||
  //   cmp dest, oldval
 | 
					 | 
				
			||||||
  //   bne exitMBB
 | 
					 | 
				
			||||||
  BB = loop1MBB;
 | 
					 | 
				
			||||||
  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
 | 
					 | 
				
			||||||
  BuildMI(BB, dl, TII->get(Size == 8 ? ARM64::SUBSXrr : ARM64::SUBSWrr))
 | 
					 | 
				
			||||||
      .addReg(Size == 8 ? ARM64::XZR : ARM64::WZR, RegState::Define)
 | 
					 | 
				
			||||||
      .addReg(dest)
 | 
					 | 
				
			||||||
      .addReg(oldval);
 | 
					 | 
				
			||||||
  BuildMI(BB, dl, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(exitMBB);
 | 
					 | 
				
			||||||
  BB->addSuccessor(loop2MBB);
 | 
					 | 
				
			||||||
  BB->addSuccessor(exitMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // loop2MBB:
 | 
					 | 
				
			||||||
  //   strex scratch, newval, [ptr]
 | 
					 | 
				
			||||||
  //   cmp scratch, #0
 | 
					 | 
				
			||||||
  //   bne loop1MBB
 | 
					 | 
				
			||||||
  BB = loop2MBB;
 | 
					 | 
				
			||||||
  BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
 | 
					 | 
				
			||||||
  BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loop1MBB);
 | 
					 | 
				
			||||||
  BB->addSuccessor(loop1MBB);
 | 
					 | 
				
			||||||
  BB->addSuccessor(exitMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  exitMBB:
 | 
					 | 
				
			||||||
  //   ...
 | 
					 | 
				
			||||||
  BB = exitMBB;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MI->eraseFromParent(); // The instruction is gone now.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return BB;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
MachineBasicBlock *
 | 
					 | 
				
			||||||
ARM64TargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
 | 
					 | 
				
			||||||
                                      unsigned Size, unsigned BinOpcode) const {
 | 
					 | 
				
			||||||
  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
 | 
					 | 
				
			||||||
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
 | 
					 | 
				
			||||||
  MachineFunction *MF = BB->getParent();
 | 
					 | 
				
			||||||
  MachineFunction::iterator It = BB;
 | 
					 | 
				
			||||||
  ++It;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  unsigned dest = MI->getOperand(0).getReg();
 | 
					 | 
				
			||||||
  unsigned ptr = MI->getOperand(1).getReg();
 | 
					 | 
				
			||||||
  unsigned incr = MI->getOperand(2).getReg();
 | 
					 | 
				
			||||||
  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
 | 
					 | 
				
			||||||
  DebugLoc dl = MI->getDebugLoc();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  unsigned ldrOpc, strOpc;
 | 
					 | 
				
			||||||
  getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
 | 
					 | 
				
			||||||
  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
 | 
					 | 
				
			||||||
  MF->insert(It, loopMBB);
 | 
					 | 
				
			||||||
  MF->insert(It, exitMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Transfer the remainder of BB and its successor edges to exitMBB.
 | 
					 | 
				
			||||||
  exitMBB->splice(exitMBB->begin(), BB,
 | 
					 | 
				
			||||||
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
 | 
					 | 
				
			||||||
  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MachineRegisterInfo &RegInfo = MF->getRegInfo();
 | 
					 | 
				
			||||||
  unsigned scratch = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass);
 | 
					 | 
				
			||||||
  unsigned scratch2 =
 | 
					 | 
				
			||||||
      (!BinOpcode)
 | 
					 | 
				
			||||||
          ? incr
 | 
					 | 
				
			||||||
          : RegInfo.createVirtualRegister(Size == 8 ? &ARM64::GPR64RegClass
 | 
					 | 
				
			||||||
                                                    : &ARM64::GPR32RegClass);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  thisMBB:
 | 
					 | 
				
			||||||
  //   ...
 | 
					 | 
				
			||||||
  //   fallthrough --> loopMBB
 | 
					 | 
				
			||||||
  BB->addSuccessor(loopMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  loopMBB:
 | 
					 | 
				
			||||||
  //   ldxr dest, ptr
 | 
					 | 
				
			||||||
  //   <binop> scratch2, dest, incr
 | 
					 | 
				
			||||||
  //   stxr scratch, scratch2, ptr
 | 
					 | 
				
			||||||
  //   cbnz scratch, loopMBB
 | 
					 | 
				
			||||||
  //   fallthrough --> exitMBB
 | 
					 | 
				
			||||||
  BB = loopMBB;
 | 
					 | 
				
			||||||
  BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
 | 
					 | 
				
			||||||
  if (BinOpcode) {
 | 
					 | 
				
			||||||
    // operand order needs to go the other way for NAND
 | 
					 | 
				
			||||||
    if (BinOpcode == ARM64::BICWrr || BinOpcode == ARM64::BICXrr)
 | 
					 | 
				
			||||||
      BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(incr).addReg(dest);
 | 
					 | 
				
			||||||
    else
 | 
					 | 
				
			||||||
      BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(dest).addReg(incr);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
 | 
					 | 
				
			||||||
  BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loopMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  BB->addSuccessor(loopMBB);
 | 
					 | 
				
			||||||
  BB->addSuccessor(exitMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  exitMBB:
 | 
					 | 
				
			||||||
  //   ...
 | 
					 | 
				
			||||||
  BB = exitMBB;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MI->eraseFromParent(); // The instruction is gone now.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return BB;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
MachineBasicBlock *ARM64TargetLowering::EmitAtomicBinary128(
 | 
					 | 
				
			||||||
    MachineInstr *MI, MachineBasicBlock *BB, unsigned BinOpcodeLo,
 | 
					 | 
				
			||||||
    unsigned BinOpcodeHi) const {
 | 
					 | 
				
			||||||
  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
 | 
					 | 
				
			||||||
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
 | 
					 | 
				
			||||||
  MachineFunction *MF = BB->getParent();
 | 
					 | 
				
			||||||
  MachineFunction::iterator It = BB;
 | 
					 | 
				
			||||||
  ++It;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  unsigned DestLo = MI->getOperand(0).getReg();
 | 
					 | 
				
			||||||
  unsigned DestHi = MI->getOperand(1).getReg();
 | 
					 | 
				
			||||||
  unsigned Ptr = MI->getOperand(2).getReg();
 | 
					 | 
				
			||||||
  unsigned IncrLo = MI->getOperand(3).getReg();
 | 
					 | 
				
			||||||
  unsigned IncrHi = MI->getOperand(4).getReg();
 | 
					 | 
				
			||||||
  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(5).getImm());
 | 
					 | 
				
			||||||
  DebugLoc DL = MI->getDebugLoc();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  unsigned LdrOpc, StrOpc;
 | 
					 | 
				
			||||||
  getExclusiveOperation(16, Ord, LdrOpc, StrOpc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
 | 
					 | 
				
			||||||
  MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
 | 
					 | 
				
			||||||
  MF->insert(It, LoopMBB);
 | 
					 | 
				
			||||||
  MF->insert(It, ExitMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Transfer the remainder of BB and its successor edges to exitMBB.
 | 
					 | 
				
			||||||
  ExitMBB->splice(ExitMBB->begin(), BB,
 | 
					 | 
				
			||||||
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
 | 
					 | 
				
			||||||
  ExitMBB->transferSuccessorsAndUpdatePHIs(BB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MachineRegisterInfo &RegInfo = MF->getRegInfo();
 | 
					 | 
				
			||||||
  unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass);
 | 
					 | 
				
			||||||
  unsigned ScratchLo = IncrLo, ScratchHi = IncrHi;
 | 
					 | 
				
			||||||
  if (BinOpcodeLo) {
 | 
					 | 
				
			||||||
    assert(BinOpcodeHi && "Expect neither or both opcodes to be defined");
 | 
					 | 
				
			||||||
    ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
 | 
					 | 
				
			||||||
    ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  ThisMBB:
 | 
					 | 
				
			||||||
  //   ...
 | 
					 | 
				
			||||||
  //   fallthrough --> LoopMBB
 | 
					 | 
				
			||||||
  BB->addSuccessor(LoopMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  LoopMBB:
 | 
					 | 
				
			||||||
  //   ldxp DestLo, DestHi, Ptr
 | 
					 | 
				
			||||||
  //   <binoplo> ScratchLo, DestLo, IncrLo
 | 
					 | 
				
			||||||
  //   <binophi> ScratchHi, DestHi, IncrHi
 | 
					 | 
				
			||||||
  //   stxp ScratchRes, ScratchLo, ScratchHi, ptr
 | 
					 | 
				
			||||||
  //   cbnz ScratchRes, LoopMBB
 | 
					 | 
				
			||||||
  //   fallthrough --> ExitMBB
 | 
					 | 
				
			||||||
  BB = LoopMBB;
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(LdrOpc), DestLo)
 | 
					 | 
				
			||||||
      .addReg(DestHi, RegState::Define)
 | 
					 | 
				
			||||||
      .addReg(Ptr);
 | 
					 | 
				
			||||||
  if (BinOpcodeLo) {
 | 
					 | 
				
			||||||
    // operand order needs to go the other way for NAND
 | 
					 | 
				
			||||||
    if (BinOpcodeLo == ARM64::BICXrr) {
 | 
					 | 
				
			||||||
      std::swap(IncrLo, DestLo);
 | 
					 | 
				
			||||||
      std::swap(IncrHi, DestHi);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    BuildMI(BB, DL, TII->get(BinOpcodeLo), ScratchLo).addReg(DestLo).addReg(
 | 
					 | 
				
			||||||
        IncrLo);
 | 
					 | 
				
			||||||
    BuildMI(BB, DL, TII->get(BinOpcodeHi), ScratchHi).addReg(DestHi).addReg(
 | 
					 | 
				
			||||||
        IncrHi);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(StrOpc), ScratchRes)
 | 
					 | 
				
			||||||
      .addReg(ScratchLo)
 | 
					 | 
				
			||||||
      .addReg(ScratchHi)
 | 
					 | 
				
			||||||
      .addReg(Ptr);
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  BB->addSuccessor(LoopMBB);
 | 
					 | 
				
			||||||
  BB->addSuccessor(ExitMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  ExitMBB:
 | 
					 | 
				
			||||||
  //   ...
 | 
					 | 
				
			||||||
  BB = ExitMBB;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MI->eraseFromParent(); // The instruction is gone now.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return BB;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
MachineBasicBlock *
 | 
					 | 
				
			||||||
ARM64TargetLowering::EmitAtomicCmpSwap128(MachineInstr *MI,
 | 
					 | 
				
			||||||
                                          MachineBasicBlock *BB) const {
 | 
					 | 
				
			||||||
  unsigned DestLo = MI->getOperand(0).getReg();
 | 
					 | 
				
			||||||
  unsigned DestHi = MI->getOperand(1).getReg();
 | 
					 | 
				
			||||||
  unsigned Ptr = MI->getOperand(2).getReg();
 | 
					 | 
				
			||||||
  unsigned OldValLo = MI->getOperand(3).getReg();
 | 
					 | 
				
			||||||
  unsigned OldValHi = MI->getOperand(4).getReg();
 | 
					 | 
				
			||||||
  unsigned NewValLo = MI->getOperand(5).getReg();
 | 
					 | 
				
			||||||
  unsigned NewValHi = MI->getOperand(6).getReg();
 | 
					 | 
				
			||||||
  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(7).getImm());
 | 
					 | 
				
			||||||
  unsigned ScratchRes = BB->getParent()->getRegInfo().createVirtualRegister(
 | 
					 | 
				
			||||||
      &ARM64::GPR32RegClass);
 | 
					 | 
				
			||||||
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 | 
					 | 
				
			||||||
  DebugLoc DL = MI->getDebugLoc();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  unsigned LdrOpc, StrOpc;
 | 
					 | 
				
			||||||
  getExclusiveOperation(16, Ord, LdrOpc, StrOpc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MachineFunction *MF = BB->getParent();
 | 
					 | 
				
			||||||
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
 | 
					 | 
				
			||||||
  MachineFunction::iterator It = BB;
 | 
					 | 
				
			||||||
  ++It; // insert the new blocks after the current block
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MachineBasicBlock *Loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
 | 
					 | 
				
			||||||
  MachineBasicBlock *Loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
 | 
					 | 
				
			||||||
  MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
 | 
					 | 
				
			||||||
  MF->insert(It, Loop1MBB);
 | 
					 | 
				
			||||||
  MF->insert(It, Loop2MBB);
 | 
					 | 
				
			||||||
  MF->insert(It, ExitMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Transfer the remainder of BB and its successor edges to exitMBB.
 | 
					 | 
				
			||||||
  ExitMBB->splice(ExitMBB->begin(), BB,
 | 
					 | 
				
			||||||
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
 | 
					 | 
				
			||||||
  ExitMBB->transferSuccessorsAndUpdatePHIs(BB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  ThisMBB:
 | 
					 | 
				
			||||||
  //   ...
 | 
					 | 
				
			||||||
  //   fallthrough --> Loop1MBB
 | 
					 | 
				
			||||||
  BB->addSuccessor(Loop1MBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Loop1MBB:
 | 
					 | 
				
			||||||
  //   ldxp DestLo, DestHi, [Ptr]
 | 
					 | 
				
			||||||
  //   cmp DestLo, OldValLo
 | 
					 | 
				
			||||||
  //   sbc xzr, DestHi, OldValHi
 | 
					 | 
				
			||||||
  //   bne ExitMBB
 | 
					 | 
				
			||||||
  BB = Loop1MBB;
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(LdrOpc), DestLo)
 | 
					 | 
				
			||||||
      .addReg(DestHi, RegState::Define)
 | 
					 | 
				
			||||||
      .addReg(Ptr);
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg(
 | 
					 | 
				
			||||||
      OldValLo);
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg(
 | 
					 | 
				
			||||||
      OldValHi);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(ExitMBB);
 | 
					 | 
				
			||||||
  BB->addSuccessor(Loop2MBB);
 | 
					 | 
				
			||||||
  BB->addSuccessor(ExitMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Loop2MBB:
 | 
					 | 
				
			||||||
  //   stxp ScratchRes, NewValLo, NewValHi, [Ptr]
 | 
					 | 
				
			||||||
  //   cbnz ScratchRes, Loop1MBB
 | 
					 | 
				
			||||||
  BB = Loop2MBB;
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(StrOpc), ScratchRes)
 | 
					 | 
				
			||||||
      .addReg(NewValLo)
 | 
					 | 
				
			||||||
      .addReg(NewValHi)
 | 
					 | 
				
			||||||
      .addReg(Ptr);
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(Loop1MBB);
 | 
					 | 
				
			||||||
  BB->addSuccessor(Loop1MBB);
 | 
					 | 
				
			||||||
  BB->addSuccessor(ExitMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  ExitMBB:
 | 
					 | 
				
			||||||
  //   ...
 | 
					 | 
				
			||||||
  BB = ExitMBB;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MI->eraseFromParent(); // The instruction is gone now.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return BB;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
MachineBasicBlock *ARM64TargetLowering::EmitAtomicMinMax128(
 | 
					 | 
				
			||||||
    MachineInstr *MI, MachineBasicBlock *BB, unsigned CondCode) const {
 | 
					 | 
				
			||||||
  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
 | 
					 | 
				
			||||||
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
 | 
					 | 
				
			||||||
  MachineFunction *MF = BB->getParent();
 | 
					 | 
				
			||||||
  MachineFunction::iterator It = BB;
 | 
					 | 
				
			||||||
  ++It;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  unsigned DestLo = MI->getOperand(0).getReg();
 | 
					 | 
				
			||||||
  unsigned DestHi = MI->getOperand(1).getReg();
 | 
					 | 
				
			||||||
  unsigned Ptr = MI->getOperand(2).getReg();
 | 
					 | 
				
			||||||
  unsigned IncrLo = MI->getOperand(3).getReg();
 | 
					 | 
				
			||||||
  unsigned IncrHi = MI->getOperand(4).getReg();
 | 
					 | 
				
			||||||
  AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(5).getImm());
 | 
					 | 
				
			||||||
  DebugLoc DL = MI->getDebugLoc();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  unsigned LdrOpc, StrOpc;
 | 
					 | 
				
			||||||
  getExclusiveOperation(16, Ord, LdrOpc, StrOpc);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
 | 
					 | 
				
			||||||
  MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
 | 
					 | 
				
			||||||
  MF->insert(It, LoopMBB);
 | 
					 | 
				
			||||||
  MF->insert(It, ExitMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Transfer the remainder of BB and its successor edges to exitMBB.
 | 
					 | 
				
			||||||
  ExitMBB->splice(ExitMBB->begin(), BB,
 | 
					 | 
				
			||||||
                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
 | 
					 | 
				
			||||||
  ExitMBB->transferSuccessorsAndUpdatePHIs(BB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MachineRegisterInfo &RegInfo = MF->getRegInfo();
 | 
					 | 
				
			||||||
  unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass);
 | 
					 | 
				
			||||||
  unsigned ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
 | 
					 | 
				
			||||||
  unsigned ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  ThisMBB:
 | 
					 | 
				
			||||||
  //   ...
 | 
					 | 
				
			||||||
  //   fallthrough --> LoopMBB
 | 
					 | 
				
			||||||
  BB->addSuccessor(LoopMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  LoopMBB:
 | 
					 | 
				
			||||||
  //   ldxp DestLo, DestHi, Ptr
 | 
					 | 
				
			||||||
  //   cmp ScratchLo, DestLo, IncrLo
 | 
					 | 
				
			||||||
  //   sbc xzr, ScratchHi, DestHi, IncrHi
 | 
					 | 
				
			||||||
  //   csel ScratchLo, DestLo, IncrLo, <cmp-op>
 | 
					 | 
				
			||||||
  //   csel ScratchHi, DestHi, IncrHi, <cmp-op>
 | 
					 | 
				
			||||||
  //   stxp ScratchRes, ScratchLo, ScratchHi, ptr
 | 
					 | 
				
			||||||
  //   cbnz ScratchRes, LoopMBB
 | 
					 | 
				
			||||||
  //   fallthrough --> ExitMBB
 | 
					 | 
				
			||||||
  BB = LoopMBB;
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(LdrOpc), DestLo)
 | 
					 | 
				
			||||||
      .addReg(DestHi, RegState::Define)
 | 
					 | 
				
			||||||
      .addReg(Ptr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg(
 | 
					 | 
				
			||||||
      IncrLo);
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg(
 | 
					 | 
				
			||||||
      IncrHi);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchLo)
 | 
					 | 
				
			||||||
      .addReg(DestLo)
 | 
					 | 
				
			||||||
      .addReg(IncrLo)
 | 
					 | 
				
			||||||
      .addImm(CondCode);
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchHi)
 | 
					 | 
				
			||||||
      .addReg(DestHi)
 | 
					 | 
				
			||||||
      .addReg(IncrHi)
 | 
					 | 
				
			||||||
      .addImm(CondCode);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(StrOpc), ScratchRes)
 | 
					 | 
				
			||||||
      .addReg(ScratchLo)
 | 
					 | 
				
			||||||
      .addReg(ScratchHi)
 | 
					 | 
				
			||||||
      .addReg(Ptr);
 | 
					 | 
				
			||||||
  BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  BB->addSuccessor(LoopMBB);
 | 
					 | 
				
			||||||
  BB->addSuccessor(ExitMBB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  //  ExitMBB:
 | 
					 | 
				
			||||||
  //   ...
 | 
					 | 
				
			||||||
  BB = ExitMBB;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  MI->eraseFromParent(); // The instruction is gone now.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return BB;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
MachineBasicBlock *
 | 
					MachineBasicBlock *
 | 
				
			||||||
ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI,
 | 
					ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI,
 | 
				
			||||||
                                  MachineBasicBlock *MBB) const {
 | 
					                                  MachineBasicBlock *MBB) const {
 | 
				
			||||||
| 
						 | 
					@ -1209,106 +758,6 @@ ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 | 
				
			||||||
    assert(0 && "Unexpected instruction for custom inserter!");
 | 
					    assert(0 && "Unexpected instruction for custom inserter!");
 | 
				
			||||||
    break;
 | 
					    break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_ADD_I8:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 1, ARM64::ADDWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_ADD_I16:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 2, ARM64::ADDWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_ADD_I32:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 4, ARM64::ADDWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_ADD_I64:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 8, ARM64::ADDXrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_ADD_I128:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary128(MI, BB, ARM64::ADDSXrr, ARM64::ADCXr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_AND_I8:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 1, ARM64::ANDWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_AND_I16:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 2, ARM64::ANDWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_AND_I32:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 4, ARM64::ANDWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_AND_I64:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 8, ARM64::ANDXrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_AND_I128:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary128(MI, BB, ARM64::ANDXrr, ARM64::ANDXrr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_OR_I8:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 1, ARM64::ORRWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_OR_I16:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 2, ARM64::ORRWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_OR_I32:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 4, ARM64::ORRWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_OR_I64:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 8, ARM64::ORRXrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_OR_I128:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary128(MI, BB, ARM64::ORRXrr, ARM64::ORRXrr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_XOR_I8:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 1, ARM64::EORWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_XOR_I16:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 2, ARM64::EORWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_XOR_I32:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 4, ARM64::EORWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_XOR_I64:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 8, ARM64::EORXrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_XOR_I128:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary128(MI, BB, ARM64::EORXrr, ARM64::EORXrr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_NAND_I8:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 1, ARM64::BICWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_NAND_I16:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 2, ARM64::BICWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_NAND_I32:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 4, ARM64::BICWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_NAND_I64:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 8, ARM64::BICXrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_NAND_I128:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary128(MI, BB, ARM64::BICXrr, ARM64::BICXrr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_SUB_I8:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 1, ARM64::SUBWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_SUB_I16:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 2, ARM64::SUBWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_SUB_I32:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 4, ARM64::SUBWrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_SUB_I64:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 8, ARM64::SUBXrr);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_SUB_I128:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary128(MI, BB, ARM64::SUBSXrr, ARM64::SBCXr);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_MIN_I128:
 | 
					 | 
				
			||||||
    return EmitAtomicMinMax128(MI, BB, ARM64CC::LT);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_MAX_I128:
 | 
					 | 
				
			||||||
    return EmitAtomicMinMax128(MI, BB, ARM64CC::GT);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_UMIN_I128:
 | 
					 | 
				
			||||||
    return EmitAtomicMinMax128(MI, BB, ARM64CC::CC);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_LOAD_UMAX_I128:
 | 
					 | 
				
			||||||
    return EmitAtomicMinMax128(MI, BB, ARM64CC::HI);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_SWAP_I8:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 1, 0);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_SWAP_I16:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 2, 0);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_SWAP_I32:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 4, 0);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_SWAP_I64:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary(MI, BB, 8, 0);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_SWAP_I128:
 | 
					 | 
				
			||||||
    return EmitAtomicBinary128(MI, BB, 0, 0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_CMP_SWAP_I8:
 | 
					 | 
				
			||||||
    return EmitAtomicCmpSwap(MI, BB, 1);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_CMP_SWAP_I16:
 | 
					 | 
				
			||||||
    return EmitAtomicCmpSwap(MI, BB, 2);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_CMP_SWAP_I32:
 | 
					 | 
				
			||||||
    return EmitAtomicCmpSwap(MI, BB, 4);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_CMP_SWAP_I64:
 | 
					 | 
				
			||||||
    return EmitAtomicCmpSwap(MI, BB, 8);
 | 
					 | 
				
			||||||
  case ARM64::ATOMIC_CMP_SWAP_I128:
 | 
					 | 
				
			||||||
    return EmitAtomicCmpSwap128(MI, BB);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  case ARM64::F128CSEL:
 | 
					  case ARM64::F128CSEL:
 | 
				
			||||||
    return EmitF128CSEL(MI, BB);
 | 
					    return EmitF128CSEL(MI, BB);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7476,113 +6925,12 @@ bool ARM64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
 | 
				
			||||||
  return true;
 | 
					  return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// The only 128-bit atomic operation is an stxp that succeeds. In particular
 | 
					 | 
				
			||||||
/// neither ldp nor ldxp are atomic. So the canonical sequence for an atomic
 | 
					 | 
				
			||||||
/// load is:
 | 
					 | 
				
			||||||
///     loop:
 | 
					 | 
				
			||||||
///         ldxp x0, x1, [x8]
 | 
					 | 
				
			||||||
///         stxp w2, x0, x1, [x8]
 | 
					 | 
				
			||||||
///         cbnz w2, loop
 | 
					 | 
				
			||||||
/// If the stxp succeeds then the ldxp managed to get both halves without an
 | 
					 | 
				
			||||||
/// intervening stxp from a different thread and the read was atomic.
 | 
					 | 
				
			||||||
static void ReplaceATOMIC_LOAD_128(SDNode *N, SmallVectorImpl<SDValue> &Results,
 | 
					 | 
				
			||||||
                                   SelectionDAG &DAG) {
 | 
					 | 
				
			||||||
  SDLoc DL(N);
 | 
					 | 
				
			||||||
  AtomicSDNode *AN = cast<AtomicSDNode>(N);
 | 
					 | 
				
			||||||
  EVT VT = AN->getMemoryVT();
 | 
					 | 
				
			||||||
  SDValue Zero = DAG.getConstant(0, VT);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // FIXME: Really want ATOMIC_LOAD_NOP but that doesn't fit into the existing
 | 
					 | 
				
			||||||
  // scheme very well. Given the complexity of what we're already generating, an
 | 
					 | 
				
			||||||
  // extra couple of ORRs probably won't make much difference.
 | 
					 | 
				
			||||||
  SDValue Result = DAG.getAtomic(ISD::ATOMIC_LOAD_OR, DL, AN->getMemoryVT(),
 | 
					 | 
				
			||||||
                                 N->getOperand(0), N->getOperand(1), Zero,
 | 
					 | 
				
			||||||
                                 AN->getMemOperand(), AN->getOrdering(),
 | 
					 | 
				
			||||||
                                 AN->getSynchScope());
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  Results.push_back(Result.getValue(0)); // Value
 | 
					 | 
				
			||||||
  Results.push_back(Result.getValue(1)); // Chain
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void ReplaceATOMIC_OP_128(SDNode *N, SmallVectorImpl<SDValue> &Results,
 | 
					 | 
				
			||||||
                                 SelectionDAG &DAG, unsigned NewOp) {
 | 
					 | 
				
			||||||
  SDLoc DL(N);
 | 
					 | 
				
			||||||
  AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
 | 
					 | 
				
			||||||
  assert(N->getValueType(0) == MVT::i128 &&
 | 
					 | 
				
			||||||
         "Only know how to expand i128 atomics");
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  SmallVector<SDValue, 6> Ops;
 | 
					 | 
				
			||||||
  Ops.push_back(N->getOperand(1)); // Ptr
 | 
					 | 
				
			||||||
  // Low part of Val1
 | 
					 | 
				
			||||||
  Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
 | 
					 | 
				
			||||||
                            N->getOperand(2), DAG.getIntPtrConstant(0)));
 | 
					 | 
				
			||||||
  // High part of Val1
 | 
					 | 
				
			||||||
  Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
 | 
					 | 
				
			||||||
                            N->getOperand(2), DAG.getIntPtrConstant(1)));
 | 
					 | 
				
			||||||
  if (NewOp == ARM64::ATOMIC_CMP_SWAP_I128) {
 | 
					 | 
				
			||||||
    // Low part of Val2
 | 
					 | 
				
			||||||
    Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
 | 
					 | 
				
			||||||
                              N->getOperand(3), DAG.getIntPtrConstant(0)));
 | 
					 | 
				
			||||||
    // High part of Val2
 | 
					 | 
				
			||||||
    Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
 | 
					 | 
				
			||||||
                              N->getOperand(3), DAG.getIntPtrConstant(1)));
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  Ops.push_back(DAG.getTargetConstant(Ordering, MVT::i32));
 | 
					 | 
				
			||||||
  Ops.push_back(N->getOperand(0)); // Chain
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
 | 
					 | 
				
			||||||
  SDNode *Result = DAG.getMachineNode(NewOp, DL, Tys, Ops);
 | 
					 | 
				
			||||||
  SDValue OpsF[] = { SDValue(Result, 0), SDValue(Result, 1) };
 | 
					 | 
				
			||||||
  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, OpsF, 2));
 | 
					 | 
				
			||||||
  Results.push_back(SDValue(Result, 2));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void ARM64TargetLowering::ReplaceNodeResults(SDNode *N,
 | 
					void ARM64TargetLowering::ReplaceNodeResults(SDNode *N,
 | 
				
			||||||
                                             SmallVectorImpl<SDValue> &Results,
 | 
					                                             SmallVectorImpl<SDValue> &Results,
 | 
				
			||||||
                                             SelectionDAG &DAG) const {
 | 
					                                             SelectionDAG &DAG) const {
 | 
				
			||||||
  switch (N->getOpcode()) {
 | 
					  switch (N->getOpcode()) {
 | 
				
			||||||
  default:
 | 
					  default:
 | 
				
			||||||
    llvm_unreachable("Don't know how to custom expand this");
 | 
					    llvm_unreachable("Don't know how to custom expand this");
 | 
				
			||||||
  case ISD::ATOMIC_LOAD:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_LOAD_128(N, Results, DAG);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_ADD:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_ADD_I128);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_SUB:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_SUB_I128);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_AND:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_AND_I128);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_OR:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_OR_I128);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_XOR:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_XOR_I128);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_NAND:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_NAND_I128);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_SWAP:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_SWAP_I128);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_MIN:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MIN_I128);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_MAX:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MAX_I128);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_UMIN:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMIN_I128);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_LOAD_UMAX:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMAX_I128);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::ATOMIC_CMP_SWAP:
 | 
					 | 
				
			||||||
    ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_CMP_SWAP_I128);
 | 
					 | 
				
			||||||
    return;
 | 
					 | 
				
			||||||
  case ISD::FP_TO_UINT:
 | 
					  case ISD::FP_TO_UINT:
 | 
				
			||||||
  case ISD::FP_TO_SINT:
 | 
					  case ISD::FP_TO_SINT:
 | 
				
			||||||
    assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
 | 
					    assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
 | 
				
			||||||
| 
						 | 
					@ -7590,3 +6938,85 @@ void ARM64TargetLowering::ReplaceNodeResults(SDNode *N,
 | 
				
			||||||
    return;
 | 
					    return;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					bool ARM64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
 | 
				
			||||||
 | 
					  // Loads and stores less than 128-bits are already atomic; ones above that
 | 
				
			||||||
 | 
					  // are doomed anyway, so defer to the default libcall and blame the OS when
 | 
				
			||||||
 | 
					  // things go wrong:
 | 
				
			||||||
 | 
					  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
 | 
				
			||||||
 | 
					    return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128;
 | 
				
			||||||
 | 
					  else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
 | 
				
			||||||
 | 
					    return LI->getType()->getPrimitiveSizeInBits() == 128;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // For the real atomic operations, we have ldxr/stxr up to 128 bits.
 | 
				
			||||||
 | 
					  return Inst->getType()->getPrimitiveSizeInBits() <= 128;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Value *ARM64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
 | 
				
			||||||
 | 
					                                           AtomicOrdering Ord) const {
 | 
				
			||||||
 | 
					  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
 | 
				
			||||||
 | 
					  Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
 | 
				
			||||||
 | 
					  bool IsAcquire =
 | 
				
			||||||
 | 
					      Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
 | 
				
			||||||
 | 
					  // intrinsic must return {i64, i64} and we have to recombine them into a
 | 
				
			||||||
 | 
					  // single i128 here.
 | 
				
			||||||
 | 
					  if (ValTy->getPrimitiveSizeInBits() == 128) {
 | 
				
			||||||
 | 
					    Intrinsic::ID Int =
 | 
				
			||||||
 | 
					        IsAcquire ? Intrinsic::arm64_ldaxp : Intrinsic::arm64_ldxp;
 | 
				
			||||||
 | 
					    Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
 | 
				
			||||||
 | 
					    Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
 | 
				
			||||||
 | 
					    Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
 | 
				
			||||||
 | 
					    Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
 | 
				
			||||||
 | 
					    Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
 | 
				
			||||||
 | 
					    return Builder.CreateOr(
 | 
				
			||||||
 | 
					        Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Type *Tys[] = { Addr->getType() };
 | 
				
			||||||
 | 
					  Intrinsic::ID Int =
 | 
				
			||||||
 | 
					      IsAcquire ? Intrinsic::arm64_ldaxr : Intrinsic::arm64_ldxr;
 | 
				
			||||||
 | 
					  Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return Builder.CreateTruncOrBitCast(
 | 
				
			||||||
 | 
					      Builder.CreateCall(Ldxr, Addr),
 | 
				
			||||||
 | 
					      cast<PointerType>(Addr->getType())->getElementType());
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Value *ARM64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
 | 
				
			||||||
 | 
					                                                 Value *Val, Value *Addr,
 | 
				
			||||||
 | 
					                                                 AtomicOrdering Ord) const {
 | 
				
			||||||
 | 
					  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
 | 
				
			||||||
 | 
					  bool IsRelease =
 | 
				
			||||||
 | 
					      Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Since the intrinsics must have legal type, the i128 intrinsics take two
 | 
				
			||||||
 | 
					  // parameters: "i64, i64". We must marshal Val into the appropriate form
 | 
				
			||||||
 | 
					  // before the call.
 | 
				
			||||||
 | 
					  if (Val->getType()->getPrimitiveSizeInBits() == 128) {
 | 
				
			||||||
 | 
					    Intrinsic::ID Int =
 | 
				
			||||||
 | 
					        IsRelease ? Intrinsic::arm64_stlxp : Intrinsic::arm64_stxp;
 | 
				
			||||||
 | 
					    Function *Stxr = Intrinsic::getDeclaration(M, Int);
 | 
				
			||||||
 | 
					    Type *Int64Ty = Type::getInt64Ty(M->getContext());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
 | 
				
			||||||
 | 
					    Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
 | 
				
			||||||
 | 
					    Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
 | 
				
			||||||
 | 
					    return Builder.CreateCall3(Stxr, Lo, Hi, Addr);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Intrinsic::ID Int =
 | 
				
			||||||
 | 
					      IsRelease ? Intrinsic::arm64_stlxr : Intrinsic::arm64_stxr;
 | 
				
			||||||
 | 
					  Type *Tys[] = { Addr->getType() };
 | 
				
			||||||
 | 
					  Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return Builder.CreateCall2(
 | 
				
			||||||
 | 
					      Stxr, Builder.CreateZExtOrBitCast(
 | 
				
			||||||
 | 
					                Val, Stxr->getFunctionType()->getParamType(0)),
 | 
				
			||||||
 | 
					      Addr);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -233,19 +233,6 @@ public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
 | 
					  SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
 | 
					 | 
				
			||||||
                                      unsigned Size, unsigned BinOpcode) const;
 | 
					 | 
				
			||||||
  MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
 | 
					 | 
				
			||||||
                                       unsigned Size) const;
 | 
					 | 
				
			||||||
  MachineBasicBlock *EmitAtomicBinary128(MachineInstr *MI,
 | 
					 | 
				
			||||||
                                         MachineBasicBlock *BB,
 | 
					 | 
				
			||||||
                                         unsigned BinOpcodeLo,
 | 
					 | 
				
			||||||
                                         unsigned BinOpcodeHi) const;
 | 
					 | 
				
			||||||
  MachineBasicBlock *EmitAtomicCmpSwap128(MachineInstr *MI,
 | 
					 | 
				
			||||||
                                          MachineBasicBlock *BB) const;
 | 
					 | 
				
			||||||
  MachineBasicBlock *EmitAtomicMinMax128(MachineInstr *MI,
 | 
					 | 
				
			||||||
                                         MachineBasicBlock *BB,
 | 
					 | 
				
			||||||
                                         unsigned CondCode) const;
 | 
					 | 
				
			||||||
  MachineBasicBlock *EmitF128CSEL(MachineInstr *MI,
 | 
					  MachineBasicBlock *EmitF128CSEL(MachineInstr *MI,
 | 
				
			||||||
                                  MachineBasicBlock *BB) const;
 | 
					                                  MachineBasicBlock *BB) const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -293,9 +280,18 @@ public:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
 | 
					  const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  /// \brief Returns true if it is beneficial to convert a load of a constant
 | 
				
			||||||
 | 
					  /// to just the constant itself.
 | 
				
			||||||
  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
 | 
					  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
 | 
				
			||||||
                                         Type *Ty) const override;
 | 
					                                         Type *Ty) const override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
 | 
				
			||||||
 | 
					                        AtomicOrdering Ord) const override;
 | 
				
			||||||
 | 
					  Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
 | 
				
			||||||
 | 
					                              Value *Addr, AtomicOrdering Ord) const override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  bool shouldExpandAtomicInIR(Instruction *Inst) const override;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
private:
 | 
					private:
 | 
				
			||||||
  /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
 | 
					  /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
 | 
				
			||||||
  /// make the right decision when generating code for different targets.
 | 
					  /// make the right decision when generating code for different targets.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -140,71 +140,6 @@ def : Pat<(relaxed_store<atomic_store_64> am_indexed64:$ptr, GPR64:$val),
 | 
				
			||||||
def : Pat<(relaxed_store<atomic_store_64> am_unscaled64:$ptr, GPR64:$val),
 | 
					def : Pat<(relaxed_store<atomic_store_64> am_unscaled64:$ptr, GPR64:$val),
 | 
				
			||||||
          (STURXi GPR64:$val, am_unscaled64:$ptr)>;
 | 
					          (STURXi GPR64:$val, am_unscaled64:$ptr)>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
//===----------------------------------
 | 
					 | 
				
			||||||
// Atomic read-modify-write operations
 | 
					 | 
				
			||||||
//===----------------------------------
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// More complicated operations need lots of C++ support, so we just create
 | 
					 | 
				
			||||||
// skeletons here for the C++ code to refer to.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
 | 
					 | 
				
			||||||
multiclass AtomicSizes {
 | 
					 | 
				
			||||||
  def _I8 : Pseudo<(outs GPR32:$dst),
 | 
					 | 
				
			||||||
                   (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
 | 
					 | 
				
			||||||
  def _I16 : Pseudo<(outs GPR32:$dst),
 | 
					 | 
				
			||||||
                    (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
 | 
					 | 
				
			||||||
  def _I32 : Pseudo<(outs GPR32:$dst),
 | 
					 | 
				
			||||||
                    (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
 | 
					 | 
				
			||||||
  def _I64 : Pseudo<(outs GPR64:$dst),
 | 
					 | 
				
			||||||
                    (ins GPR64sp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
 | 
					 | 
				
			||||||
  def _I128 : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi),
 | 
					 | 
				
			||||||
                     (ins GPR64sp:$ptr, GPR64:$incrlo, GPR64:$incrhi,
 | 
					 | 
				
			||||||
                          i32imm:$ordering), []>;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
defm ATOMIC_LOAD_ADD  : AtomicSizes;
 | 
					 | 
				
			||||||
defm ATOMIC_LOAD_SUB  : AtomicSizes;
 | 
					 | 
				
			||||||
defm ATOMIC_LOAD_AND  : AtomicSizes;
 | 
					 | 
				
			||||||
defm ATOMIC_LOAD_OR   : AtomicSizes;
 | 
					 | 
				
			||||||
defm ATOMIC_LOAD_XOR  : AtomicSizes;
 | 
					 | 
				
			||||||
defm ATOMIC_LOAD_NAND : AtomicSizes;
 | 
					 | 
				
			||||||
defm ATOMIC_SWAP      : AtomicSizes;
 | 
					 | 
				
			||||||
let Defs = [CPSR] in {
 | 
					 | 
				
			||||||
  // These operations need a CMP to calculate the correct value
 | 
					 | 
				
			||||||
  defm ATOMIC_LOAD_MIN  : AtomicSizes;
 | 
					 | 
				
			||||||
  defm ATOMIC_LOAD_MAX  : AtomicSizes;
 | 
					 | 
				
			||||||
  defm ATOMIC_LOAD_UMIN : AtomicSizes;
 | 
					 | 
				
			||||||
  defm ATOMIC_LOAD_UMAX : AtomicSizes;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class AtomicCmpSwap<RegisterClass GPRData>
 | 
					 | 
				
			||||||
  : Pseudo<(outs GPRData:$dst),
 | 
					 | 
				
			||||||
           (ins GPR64sp:$ptr, GPRData:$old, GPRData:$new,
 | 
					 | 
				
			||||||
                i32imm:$ordering), []> {
 | 
					 | 
				
			||||||
  let usesCustomInserter = 1;
 | 
					 | 
				
			||||||
  let hasCtrlDep = 1;
 | 
					 | 
				
			||||||
  let mayLoad = 1;
 | 
					 | 
				
			||||||
  let mayStore = 1;
 | 
					 | 
				
			||||||
  let Defs = [CPSR];
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def ATOMIC_CMP_SWAP_I8  : AtomicCmpSwap<GPR32>;
 | 
					 | 
				
			||||||
def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
 | 
					 | 
				
			||||||
def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
 | 
					 | 
				
			||||||
def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def ATOMIC_CMP_SWAP_I128
 | 
					 | 
				
			||||||
  : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi),
 | 
					 | 
				
			||||||
           (ins GPR64sp:$ptr, GPR64:$oldlo, GPR64:$oldhi,
 | 
					 | 
				
			||||||
                GPR64:$newlo, GPR64:$newhi, i32imm:$ordering), []> {
 | 
					 | 
				
			||||||
  let usesCustomInserter = 1;
 | 
					 | 
				
			||||||
  let hasCtrlDep = 1;
 | 
					 | 
				
			||||||
  let mayLoad = 1;
 | 
					 | 
				
			||||||
  let mayStore = 1;
 | 
					 | 
				
			||||||
  let Defs = [CPSR];
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
//===----------------------------------
 | 
					//===----------------------------------
 | 
				
			||||||
// Low-level exclusive operations
 | 
					// Low-level exclusive operations
 | 
				
			||||||
//===----------------------------------
 | 
					//===----------------------------------
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,13 +5,14 @@
 | 
				
			||||||
define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
 | 
					define i128 @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) {
 | 
				
			||||||
; CHECK-LABEL: val_compare_and_swap:
 | 
					; CHECK-LABEL: val_compare_and_swap:
 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldaxp   [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x0]
 | 
					; CHECK: ldaxp   [[RESULTLO:x[0-9]+]], [[RESULTHI:x[0-9]+]], [x[[ADDR:[0-9]+]]]
 | 
				
			||||||
; CHECK: cmp    [[RESULTLO]], x2
 | 
					; CHECK-DAG: eor     [[MISMATCH_LO:x[0-9]+]], [[RESULTLO]], x2
 | 
				
			||||||
; CHECK: sbc    xzr, [[RESULTHI]], x3
 | 
					; CHECK-DAG: eor     [[MISMATCH_HI:x[0-9]+]], [[RESULTHI]], x3
 | 
				
			||||||
; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
 | 
					; CHECK: orr [[MISMATCH:x[0-9]+]], [[MISMATCH_LO]], [[MISMATCH_HI]]
 | 
				
			||||||
; CHECK: stxp   [[SCRATCH_RES:w[0-9]+]], x4, x5, [x0]
 | 
					; CHECK: cbnz    [[MISMATCH]], [[DONE:.LBB[0-9]+_[0-9]+]]
 | 
				
			||||||
 | 
					; CHECK: stxp   [[SCRATCH_RES:w[0-9]+]], x4, x5, [x[[ADDR]]]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
				
			||||||
; CHECK: [[LABEL2]]:
 | 
					; CHECK: [[DONE]]:
 | 
				
			||||||
  %val = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
 | 
					  %val = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire
 | 
				
			||||||
  ret i128 %val
 | 
					  ret i128 %val
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -20,13 +21,13 @@ define void @fetch_and_nand(i128* %p, i128 %bits) {
 | 
				
			||||||
; CHECK-LABEL: fetch_and_nand:
 | 
					; CHECK-LABEL: fetch_and_nand:
 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
					; CHECK: ldxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
				
			||||||
; CHECK: bic    [[SCRATCH_REGLO:x[0-9]+]], x2, [[DEST_REGLO]]
 | 
					; CHECK-DAG: bic    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
 | 
				
			||||||
; CHECK: bic    [[SCRATCH_REGHI:x[0-9]+]], x3, [[DEST_REGHI]]
 | 
					; CHECK-DAG: bic    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
 | 
				
			||||||
; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
					; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: str    [[DEST_REGHI]]
 | 
					; CHECK-DAG: str    [[DEST_REGHI]]
 | 
				
			||||||
; CHECK: str    [[DEST_REGLO]]
 | 
					; CHECK-DAG: str    [[DEST_REGLO]]
 | 
				
			||||||
  %val = atomicrmw nand i128* %p, i128 %bits release
 | 
					  %val = atomicrmw nand i128* %p, i128 %bits release
 | 
				
			||||||
  store i128 %val, i128* @var, align 16
 | 
					  store i128 %val, i128* @var, align 16
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
| 
						 | 
					@ -36,13 +37,13 @@ define void @fetch_and_or(i128* %p, i128 %bits) {
 | 
				
			||||||
; CHECK-LABEL: fetch_and_or:
 | 
					; CHECK-LABEL: fetch_and_or:
 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
					; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
				
			||||||
; CHECK: orr    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
 | 
					; CHECK-DAG: orr    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
 | 
				
			||||||
; CHECK: orr    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
 | 
					; CHECK-DAG: orr    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
 | 
				
			||||||
; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
					; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: str    [[DEST_REGHI]]
 | 
					; CHECK-DAG: str    [[DEST_REGHI]]
 | 
				
			||||||
; CHECK: str    [[DEST_REGLO]]
 | 
					; CHECK-DAG: str    [[DEST_REGLO]]
 | 
				
			||||||
  %val = atomicrmw or i128* %p, i128 %bits seq_cst
 | 
					  %val = atomicrmw or i128* %p, i128 %bits seq_cst
 | 
				
			||||||
  store i128 %val, i128* @var, align 16
 | 
					  store i128 %val, i128* @var, align 16
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
| 
						 | 
					@ -53,12 +54,12 @@ define void @fetch_and_add(i128* %p, i128 %bits) {
 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
					; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
				
			||||||
; CHECK: adds   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
 | 
					; CHECK: adds   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
 | 
				
			||||||
; CHECK: adc    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
 | 
					; CHECK: adcs   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
 | 
				
			||||||
; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
					; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: str    [[DEST_REGHI]]
 | 
					; CHECK-DAG: str    [[DEST_REGHI]]
 | 
				
			||||||
; CHECK: str    [[DEST_REGLO]]
 | 
					; CHECK-DAG: str    [[DEST_REGLO]]
 | 
				
			||||||
  %val = atomicrmw add i128* %p, i128 %bits seq_cst
 | 
					  %val = atomicrmw add i128* %p, i128 %bits seq_cst
 | 
				
			||||||
  store i128 %val, i128* @var, align 16
 | 
					  store i128 %val, i128* @var, align 16
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
| 
						 | 
					@ -69,12 +70,12 @@ define void @fetch_and_sub(i128* %p, i128 %bits) {
 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
					; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
				
			||||||
; CHECK: subs   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
 | 
					; CHECK: subs   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2
 | 
				
			||||||
; CHECK: sbc    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
 | 
					; CHECK: sbcs    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3
 | 
				
			||||||
; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
					; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: str    [[DEST_REGHI]]
 | 
					; CHECK-DAG: str    [[DEST_REGHI]]
 | 
				
			||||||
; CHECK: str    [[DEST_REGLO]]
 | 
					; CHECK-DAG: str    [[DEST_REGLO]]
 | 
				
			||||||
  %val = atomicrmw sub i128* %p, i128 %bits seq_cst
 | 
					  %val = atomicrmw sub i128* %p, i128 %bits seq_cst
 | 
				
			||||||
  store i128 %val, i128* @var, align 16
 | 
					  store i128 %val, i128* @var, align 16
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
| 
						 | 
					@ -85,14 +86,18 @@ define void @fetch_and_min(i128* %p, i128 %bits) {
 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldaxp   [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
					; CHECK: ldaxp   [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
				
			||||||
; CHECK: cmp     [[DEST_REGLO]], x2
 | 
					; CHECK: cmp     [[DEST_REGLO]], x2
 | 
				
			||||||
; CHECK: sbc    xzr, [[DEST_REGHI]], x3
 | 
					; CHECK: csinc   [[LOCMP:w[0-9]+]], wzr, wzr, hi
 | 
				
			||||||
; CHECK: csel   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, lt
 | 
					; CHECK: cmp     [[DEST_REGHI:x[0-9]+]], x3
 | 
				
			||||||
; CHECK: csel   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, lt
 | 
					; CHECK: csinc   [[HICMP:w[0-9]+]], wzr, wzr, gt
 | 
				
			||||||
 | 
					; CHECK: csel    [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
 | 
				
			||||||
 | 
					; CHECK: cmp     [[CMP]], #0
 | 
				
			||||||
 | 
					; CHECK-DAG: csel    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
 | 
				
			||||||
 | 
					; CHECK-DAG: csel    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
 | 
				
			||||||
; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
					; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: str    [[DEST_REGHI]]
 | 
					; CHECK-DAG: str    [[DEST_REGHI]]
 | 
				
			||||||
; CHECK: str    [[DEST_REGLO]]
 | 
					; CHECK-DAG: str    [[DEST_REGLO]]
 | 
				
			||||||
  %val = atomicrmw min i128* %p, i128 %bits seq_cst
 | 
					  %val = atomicrmw min i128* %p, i128 %bits seq_cst
 | 
				
			||||||
  store i128 %val, i128* @var, align 16
 | 
					  store i128 %val, i128* @var, align 16
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
| 
						 | 
					@ -103,14 +108,18 @@ define void @fetch_and_max(i128* %p, i128 %bits) {
 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
					; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
				
			||||||
; CHECK: cmp     [[DEST_REGLO]], x2
 | 
					; CHECK: cmp     [[DEST_REGLO]], x2
 | 
				
			||||||
; CHECK: sbc    xzr, [[DEST_REGHI]], x3
 | 
					; CHECK: csinc   [[LOCMP:w[0-9]+]], wzr, wzr, ls
 | 
				
			||||||
; CHECK: csel   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, gt
 | 
					; CHECK: cmp     [[DEST_REGHI:x[0-9]+]], x3
 | 
				
			||||||
; CHECK: csel   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, gt
 | 
					; CHECK: csinc   [[HICMP:w[0-9]+]], wzr, wzr, le
 | 
				
			||||||
 | 
					; CHECK: csel    [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
 | 
				
			||||||
 | 
					; CHECK: cmp     [[CMP]], #0
 | 
				
			||||||
 | 
					; CHECK-DAG: csel    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
 | 
				
			||||||
 | 
					; CHECK-DAG: csel    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
 | 
				
			||||||
; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
					; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: str    [[DEST_REGHI]]
 | 
					; CHECK-DAG: str    [[DEST_REGHI]]
 | 
				
			||||||
; CHECK: str    [[DEST_REGLO]]
 | 
					; CHECK-DAG: str    [[DEST_REGLO]]
 | 
				
			||||||
  %val = atomicrmw max i128* %p, i128 %bits seq_cst
 | 
					  %val = atomicrmw max i128* %p, i128 %bits seq_cst
 | 
				
			||||||
  store i128 %val, i128* @var, align 16
 | 
					  store i128 %val, i128* @var, align 16
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
| 
						 | 
					@ -121,14 +130,18 @@ define void @fetch_and_umin(i128* %p, i128 %bits) {
 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
					; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
				
			||||||
; CHECK: cmp     [[DEST_REGLO]], x2
 | 
					; CHECK: cmp     [[DEST_REGLO]], x2
 | 
				
			||||||
; CHECK: sbc    xzr, [[DEST_REGHI]], x3
 | 
					; CHECK: csinc   [[LOCMP:w[0-9]+]], wzr, wzr, hi
 | 
				
			||||||
; CHECK: csel   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, cc
 | 
					; CHECK: cmp     [[DEST_REGHI:x[0-9]+]], x3
 | 
				
			||||||
; CHECK: csel   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, cc
 | 
					; CHECK: csinc   [[HICMP:w[0-9]+]], wzr, wzr, hi
 | 
				
			||||||
 | 
					; CHECK: csel    [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
 | 
				
			||||||
 | 
					; CHECK: cmp     [[CMP]], #0
 | 
				
			||||||
 | 
					; CHECK-DAG: csel    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
 | 
				
			||||||
 | 
					; CHECK-DAG: csel    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
 | 
				
			||||||
; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
					; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: str    [[DEST_REGHI]]
 | 
					; CHECK-DAG: str    [[DEST_REGHI]]
 | 
				
			||||||
; CHECK: str    [[DEST_REGLO]]
 | 
					; CHECK-DAG: str    [[DEST_REGLO]]
 | 
				
			||||||
  %val = atomicrmw umin i128* %p, i128 %bits seq_cst
 | 
					  %val = atomicrmw umin i128* %p, i128 %bits seq_cst
 | 
				
			||||||
  store i128 %val, i128* @var, align 16
 | 
					  store i128 %val, i128* @var, align 16
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
| 
						 | 
					@ -139,14 +152,18 @@ define void @fetch_and_umax(i128* %p, i128 %bits) {
 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
					; CHECK: ldaxp  [[DEST_REGLO:x[0-9]+]], [[DEST_REGHI:x[0-9]+]], [x0]
 | 
				
			||||||
; CHECK: cmp     [[DEST_REGLO]], x2
 | 
					; CHECK: cmp     [[DEST_REGLO]], x2
 | 
				
			||||||
; CHECK: sbc    xzr, [[DEST_REGHI]], x3
 | 
					; CHECK: csinc   [[LOCMP:w[0-9]+]], wzr, wzr, ls
 | 
				
			||||||
; CHECK: csel   [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, hi
 | 
					; CHECK: cmp     [[DEST_REGHI:x[0-9]+]], x3
 | 
				
			||||||
; CHECK: csel   [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, hi
 | 
					; CHECK: csinc   [[HICMP:w[0-9]+]], wzr, wzr, ls
 | 
				
			||||||
 | 
					; CHECK: csel    [[CMP:w[0-9]+]], [[LOCMP]], [[HICMP]], eq
 | 
				
			||||||
 | 
					; CHECK: cmp     [[CMP]], #0
 | 
				
			||||||
 | 
					; CHECK-DAG: csel    [[SCRATCH_REGHI:x[0-9]+]], [[DEST_REGHI]], x3, ne
 | 
				
			||||||
 | 
					; CHECK-DAG: csel    [[SCRATCH_REGLO:x[0-9]+]], [[DEST_REGLO]], x2, ne
 | 
				
			||||||
; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
					; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: str    [[DEST_REGHI]]
 | 
					; CHECK-DAG: str    [[DEST_REGHI]]
 | 
				
			||||||
; CHECK: str    [[DEST_REGLO]]
 | 
					; CHECK-DAG: str    [[DEST_REGLO]]
 | 
				
			||||||
  %val = atomicrmw umax i128* %p, i128 %bits seq_cst
 | 
					  %val = atomicrmw umax i128* %p, i128 %bits seq_cst
 | 
				
			||||||
  store i128 %val, i128* @var, align 16
 | 
					  store i128 %val, i128* @var, align 16
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
| 
						 | 
					@ -164,12 +181,7 @@ define i128 @atomic_load_seq_cst(i128* %p) {
 | 
				
			||||||
define i128 @atomic_load_relaxed(i128* %p) {
 | 
					define i128 @atomic_load_relaxed(i128* %p) {
 | 
				
			||||||
; CHECK-LABEL: atomic_load_relaxed:
 | 
					; CHECK-LABEL: atomic_load_relaxed:
 | 
				
			||||||
; CHECK-NOT: dmb
 | 
					; CHECK-NOT: dmb
 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					 | 
				
			||||||
; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0]
 | 
					; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0]
 | 
				
			||||||
; CHECK: orr [[SAMELO:x[0-9]+]], [[LO]], xzr
 | 
					 | 
				
			||||||
; CHECK: orr [[SAMEHI:x[0-9]+]], [[HI]], xzr
 | 
					 | 
				
			||||||
; CHECK: stxp [[SUCCESS:w[0-9]+]], [[SAMELO]], [[SAMEHI]], [x0]
 | 
					 | 
				
			||||||
; CHECK: cbnz [[SUCCESS]], [[LABEL]]
 | 
					 | 
				
			||||||
; CHECK-NOT: dmb
 | 
					; CHECK-NOT: dmb
 | 
				
			||||||
   %r = load atomic i128* %p monotonic, align 16
 | 
					   %r = load atomic i128* %p monotonic, align 16
 | 
				
			||||||
   ret i128 %r
 | 
					   ret i128 %r
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,10 +3,9 @@
 | 
				
			||||||
define i32 @val_compare_and_swap(i32* %p) {
 | 
					define i32 @val_compare_and_swap(i32* %p) {
 | 
				
			||||||
; CHECK-LABEL: val_compare_and_swap:
 | 
					; CHECK-LABEL: val_compare_and_swap:
 | 
				
			||||||
; CHECK: orr    [[NEWVAL_REG:w[0-9]+]], wzr, #0x4
 | 
					; CHECK: orr    [[NEWVAL_REG:w[0-9]+]], wzr, #0x4
 | 
				
			||||||
; CHECK: orr    [[OLDVAL_REG:w[0-9]+]], wzr, #0x7
 | 
					 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldaxr   [[RESULT:w[0-9]+]], [x0]
 | 
					; CHECK: ldaxr   [[RESULT:w[0-9]+]], [x0]
 | 
				
			||||||
; CHECK: cmp    [[RESULT]], [[OLDVAL_REG]]
 | 
					; CHECK: cmp    [[RESULT]], #7
 | 
				
			||||||
; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
 | 
					; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
 | 
				
			||||||
; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0]
 | 
					; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[NEWVAL_REG]], [x0]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
 | 
				
			||||||
| 
						 | 
					@ -18,10 +17,9 @@ define i32 @val_compare_and_swap(i32* %p) {
 | 
				
			||||||
define i64 @val_compare_and_swap_64(i64* %p) {
 | 
					define i64 @val_compare_and_swap_64(i64* %p) {
 | 
				
			||||||
; CHECK-LABEL: val_compare_and_swap_64:
 | 
					; CHECK-LABEL: val_compare_and_swap_64:
 | 
				
			||||||
; CHECK: orr    w[[NEWVAL_REG:[0-9]+]], wzr, #0x4
 | 
					; CHECK: orr    w[[NEWVAL_REG:[0-9]+]], wzr, #0x4
 | 
				
			||||||
; CHECK: orr    w[[OLDVAL_REG:[0-9]+]], wzr, #0x7
 | 
					 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldxr   [[RESULT:x[0-9]+]], [x0]
 | 
					; CHECK: ldxr   [[RESULT:x[0-9]+]], [x0]
 | 
				
			||||||
; CHECK: cmp    [[RESULT]], x[[OLDVAL_REG]]
 | 
					; CHECK: cmp    [[RESULT]], #7
 | 
				
			||||||
; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
 | 
					; CHECK: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
 | 
				
			||||||
; CHECK-NOT: stxr x[[NEWVAL_REG]], x[[NEWVAL_REG]]
 | 
					; CHECK-NOT: stxr x[[NEWVAL_REG]], x[[NEWVAL_REG]]
 | 
				
			||||||
; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], x[[NEWVAL_REG]], [x0]
 | 
					; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], x[[NEWVAL_REG]], [x0]
 | 
				
			||||||
| 
						 | 
					@ -33,10 +31,9 @@ define i64 @val_compare_and_swap_64(i64* %p) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define i32 @fetch_and_nand(i32* %p) {
 | 
					define i32 @fetch_and_nand(i32* %p) {
 | 
				
			||||||
; CHECK-LABEL: fetch_and_nand:
 | 
					; CHECK-LABEL: fetch_and_nand:
 | 
				
			||||||
; CHECK: orr    [[OLDVAL_REG:w[0-9]+]], wzr, #0x7
 | 
					 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldxr   w[[DEST_REG:[0-9]+]], [x0]
 | 
					; CHECK: ldxr   w[[DEST_REG:[0-9]+]], [x0]
 | 
				
			||||||
; CHECK: bic    [[SCRATCH2_REG:w[0-9]+]], [[OLDVAL_REG]], w[[DEST_REG]]
 | 
					; CHECK: and    [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], #0xfffffff8
 | 
				
			||||||
; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
 | 
					; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
 | 
				
			||||||
; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
 | 
					; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
 | 
				
			||||||
| 
						 | 
					@ -47,13 +44,13 @@ define i32 @fetch_and_nand(i32* %p) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define i64 @fetch_and_nand_64(i64* %p) {
 | 
					define i64 @fetch_and_nand_64(i64* %p) {
 | 
				
			||||||
; CHECK-LABEL: fetch_and_nand_64:
 | 
					; CHECK-LABEL: fetch_and_nand_64:
 | 
				
			||||||
; CHECK: orr    w[[OLDVAL_REG:[0-9]+]], wzr, #0x7
 | 
					; CHECK: mov    x[[ADDR:[0-9]+]], x0
 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldaxr   [[DEST_REG:x[0-9]+]], [x0]
 | 
					; CHECK: ldaxr   [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
 | 
				
			||||||
; CHECK: bic    [[SCRATCH2_REG:x[0-9]+]], x[[OLDVAL_REG]], [[DEST_REG]]
 | 
					; CHECK: and    [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0xfffffffffffffff8
 | 
				
			||||||
; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
 | 
					; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
 | 
				
			||||||
; CHECK: mov    x0, [[DEST_REG]]
 | 
					
 | 
				
			||||||
  %val = atomicrmw nand i64* %p, i64 7 acq_rel
 | 
					  %val = atomicrmw nand i64* %p, i64 7 acq_rel
 | 
				
			||||||
  ret i64 %val
 | 
					  ret i64 %val
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -74,13 +71,12 @@ define i32 @fetch_and_or(i32* %p) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define i64 @fetch_and_or_64(i64* %p) {
 | 
					define i64 @fetch_and_or_64(i64* %p) {
 | 
				
			||||||
; CHECK: fetch_and_or_64:
 | 
					; CHECK: fetch_and_or_64:
 | 
				
			||||||
; CHECK: orr    w[[OLDVAL_REG:[0-9]+]], wzr, #0x7
 | 
					; CHECK: mov    x[[ADDR:[0-9]+]], x0
 | 
				
			||||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
					; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
 | 
				
			||||||
; CHECK: ldxr   [[DEST_REG:x[0-9]+]], [x0]
 | 
					; CHECK: ldxr   [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
 | 
				
			||||||
; CHECK: orr    [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], x[[OLDVAL_REG]]
 | 
					; CHECK: orr    [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0x7
 | 
				
			||||||
; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
 | 
					; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
 | 
				
			||||||
; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
 | 
					; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
 | 
				
			||||||
; CHECK: mov    x0, [[DEST_REG]]
 | 
					 | 
				
			||||||
  %val = atomicrmw or i64* %p, i64 7 monotonic
 | 
					  %val = atomicrmw or i64* %p, i64 7 monotonic
 | 
				
			||||||
  ret i64 %val
 | 
					  ret i64 %val
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue