[SLP]Improve/fix reordering of the gathered graph nodes.
Gathered loads/extractelements/extractvalue instructions should be checked if they can represent a vector reordering node too and their order should ve taken into account for better graph reordering analysis/ Also, if the gather node has reused scalars, they must be reordered instead of the scalars themselves. Differential Revision: https://reviews.llvm.org/D112454
This commit is contained in:
parent
4d2765e994
commit
07ef9f513f
|
|
@ -766,6 +766,12 @@ public:
|
||||||
/// Perform LICM and CSE on the newly generated gather sequences.
|
/// Perform LICM and CSE on the newly generated gather sequences.
|
||||||
void optimizeGatherSequence();
|
void optimizeGatherSequence();
|
||||||
|
|
||||||
|
/// Checks if the specified gather tree entry \p TE can be represented as a
|
||||||
|
/// shuffled vector entry + (possibly) permutation with other gathers. It
|
||||||
|
/// implements the checks only for possibly ordered scalars (Loads,
|
||||||
|
/// ExtractElement, ExtractValue), which can be part of the graph.
|
||||||
|
Optional<OrdersType> findReusedOrderedScalars(const TreeEntry &TE);
|
||||||
|
|
||||||
/// Reorders the current graph to the most profitable order starting from the
|
/// Reorders the current graph to the most profitable order starting from the
|
||||||
/// root node to the leaf nodes. The best order is chosen only from the nodes
|
/// root node to the leaf nodes. The best order is chosen only from the nodes
|
||||||
/// of the same size (vectorization factor). Smaller nodes are considered
|
/// of the same size (vectorization factor). Smaller nodes are considered
|
||||||
|
|
@ -2670,6 +2676,72 @@ static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask) {
|
||||||
fixupOrderingIndices(Order);
|
fixupOrderingIndices(Order);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Optional<BoUpSLP::OrdersType>
|
||||||
|
BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
|
||||||
|
assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only.");
|
||||||
|
unsigned NumScalars = TE.Scalars.size();
|
||||||
|
OrdersType CurrentOrder(NumScalars, NumScalars);
|
||||||
|
SmallVector<int> Positions;
|
||||||
|
SmallBitVector UsedPositions(NumScalars);
|
||||||
|
const TreeEntry *STE = nullptr;
|
||||||
|
// Try to find all gathered scalars that are gets vectorized in other
|
||||||
|
// vectorize node. Here we can have only one single tree vector node to
|
||||||
|
// correctly identify order of the gathered scalars.
|
||||||
|
for (unsigned I = 0; I < NumScalars; ++I) {
|
||||||
|
Value *V = TE.Scalars[I];
|
||||||
|
if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
|
||||||
|
continue;
|
||||||
|
if (const auto *LocalSTE = getTreeEntry(V)) {
|
||||||
|
if (!STE)
|
||||||
|
STE = LocalSTE;
|
||||||
|
else if (STE != LocalSTE)
|
||||||
|
// Take the order only from the single vector node.
|
||||||
|
return None;
|
||||||
|
unsigned Lane =
|
||||||
|
std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
|
||||||
|
if (Lane >= NumScalars)
|
||||||
|
return None;
|
||||||
|
if (CurrentOrder[Lane] != NumScalars) {
|
||||||
|
if (Lane != I)
|
||||||
|
continue;
|
||||||
|
UsedPositions.reset(CurrentOrder[Lane]);
|
||||||
|
}
|
||||||
|
// The partial identity (where only some elements of the gather node are
|
||||||
|
// in the identity order) is good.
|
||||||
|
CurrentOrder[Lane] = I;
|
||||||
|
UsedPositions.set(I);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Need to keep the order if we have a vector entry and at least 2 scalars or
|
||||||
|
// the vectorized entry has just 2 scalars.
|
||||||
|
if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) {
|
||||||
|
auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) {
|
||||||
|
for (unsigned I = 0; I < NumScalars; ++I)
|
||||||
|
if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
if (IsIdentityOrder(CurrentOrder)) {
|
||||||
|
CurrentOrder.clear();
|
||||||
|
return CurrentOrder;
|
||||||
|
}
|
||||||
|
auto *It = CurrentOrder.begin();
|
||||||
|
for (unsigned I = 0; I < NumScalars;) {
|
||||||
|
if (UsedPositions.test(I)) {
|
||||||
|
++I;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (*It == NumScalars) {
|
||||||
|
*It = I;
|
||||||
|
++I;
|
||||||
|
}
|
||||||
|
++It;
|
||||||
|
}
|
||||||
|
return CurrentOrder;
|
||||||
|
}
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
void BoUpSLP::reorderTopToBottom() {
|
void BoUpSLP::reorderTopToBottom() {
|
||||||
// Maps VF to the graph nodes.
|
// Maps VF to the graph nodes.
|
||||||
DenseMap<unsigned, SmallPtrSet<TreeEntry *, 4>> VFToOrderedEntries;
|
DenseMap<unsigned, SmallPtrSet<TreeEntry *, 4>> VFToOrderedEntries;
|
||||||
|
|
@ -2689,19 +2761,29 @@ void BoUpSLP::reorderTopToBottom() {
|
||||||
InsertElementInst>(TE->getMainOp()) &&
|
InsertElementInst>(TE->getMainOp()) &&
|
||||||
!TE->isAltShuffle()) {
|
!TE->isAltShuffle()) {
|
||||||
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
|
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
|
||||||
} else if (TE->State == TreeEntry::NeedToGather &&
|
return;
|
||||||
TE->getOpcode() == Instruction::ExtractElement &&
|
}
|
||||||
!TE->isAltShuffle() &&
|
if (TE->State == TreeEntry::NeedToGather) {
|
||||||
isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
|
if (TE->getOpcode() == Instruction::ExtractElement &&
|
||||||
->getVectorOperandType()) &&
|
!TE->isAltShuffle() &&
|
||||||
allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
|
isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
|
||||||
// Check that gather of extractelements can be represented as
|
->getVectorOperandType()) &&
|
||||||
// just a shuffle of a single vector.
|
allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
|
||||||
OrdersType CurrentOrder;
|
// Check that gather of extractelements can be represented as
|
||||||
bool Reuse = canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
|
// just a shuffle of a single vector.
|
||||||
if (Reuse || !CurrentOrder.empty()) {
|
OrdersType CurrentOrder;
|
||||||
|
bool Reuse =
|
||||||
|
canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
|
||||||
|
if (Reuse || !CurrentOrder.empty()) {
|
||||||
|
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
|
||||||
|
GathersToOrders.try_emplace(TE.get(), CurrentOrder);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Optional<OrdersType> CurrentOrder =
|
||||||
|
findReusedOrderedScalars(*TE.get())) {
|
||||||
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
|
VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
|
||||||
GathersToOrders.try_emplace(TE.get(), CurrentOrder);
|
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
@ -2718,7 +2800,9 @@ void BoUpSLP::reorderTopToBottom() {
|
||||||
const SmallPtrSetImpl<TreeEntry *> &OrderedEntries = It->getSecond();
|
const SmallPtrSetImpl<TreeEntry *> &OrderedEntries = It->getSecond();
|
||||||
// All operands are reordered and used only in this node - propagate the
|
// All operands are reordered and used only in this node - propagate the
|
||||||
// most used order to the user node.
|
// most used order to the user node.
|
||||||
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo> OrdersUses;
|
MapVector<OrdersType, unsigned,
|
||||||
|
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
|
||||||
|
OrdersUses;
|
||||||
SmallPtrSet<const TreeEntry *, 4> VisitedOps;
|
SmallPtrSet<const TreeEntry *, 4> VisitedOps;
|
||||||
for (const TreeEntry *OpTE : OrderedEntries) {
|
for (const TreeEntry *OpTE : OrderedEntries) {
|
||||||
// No need to reorder this nodes, still need to extend and to use shuffle,
|
// No need to reorder this nodes, still need to extend and to use shuffle,
|
||||||
|
|
@ -2742,18 +2826,18 @@ void BoUpSLP::reorderTopToBottom() {
|
||||||
return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
|
return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
|
||||||
});
|
});
|
||||||
fixupOrderingIndices(CurrentOrder);
|
fixupOrderingIndices(CurrentOrder);
|
||||||
++OrdersUses.try_emplace(CurrentOrder).first->getSecond();
|
++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;
|
||||||
} else {
|
} else {
|
||||||
++OrdersUses.try_emplace(Order).first->getSecond();
|
++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Set order of the user node.
|
// Set order of the user node.
|
||||||
if (OrdersUses.empty())
|
if (OrdersUses.empty())
|
||||||
continue;
|
continue;
|
||||||
// Choose the most used order.
|
// Choose the most used order.
|
||||||
ArrayRef<unsigned> BestOrder = OrdersUses.begin()->first;
|
ArrayRef<unsigned> BestOrder = OrdersUses.front().first;
|
||||||
unsigned Cnt = OrdersUses.begin()->second;
|
unsigned Cnt = OrdersUses.front().second;
|
||||||
for (const auto &Pair : llvm::drop_begin(OrdersUses)) {
|
for (const auto &Pair : drop_begin(OrdersUses)) {
|
||||||
if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
|
if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
|
||||||
BestOrder = Pair.first;
|
BestOrder = Pair.first;
|
||||||
Cnt = Pair.second;
|
Cnt = Pair.second;
|
||||||
|
|
@ -2830,6 +2914,8 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
|
||||||
for_each(VectorizableTree, [this, &OrderedEntries, &GathersToOrders,
|
for_each(VectorizableTree, [this, &OrderedEntries, &GathersToOrders,
|
||||||
&NonVectorized](
|
&NonVectorized](
|
||||||
const std::unique_ptr<TreeEntry> &TE) {
|
const std::unique_ptr<TreeEntry> &TE) {
|
||||||
|
if (TE->State != TreeEntry::Vectorize)
|
||||||
|
NonVectorized.push_back(TE.get());
|
||||||
// No need to reorder if need to shuffle reuses, still need to shuffle the
|
// No need to reorder if need to shuffle reuses, still need to shuffle the
|
||||||
// node.
|
// node.
|
||||||
if (!TE->ReuseShuffleIndices.empty())
|
if (!TE->ReuseShuffleIndices.empty())
|
||||||
|
|
@ -2838,28 +2924,37 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
|
||||||
isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE->getMainOp()) &&
|
isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE->getMainOp()) &&
|
||||||
!TE->isAltShuffle()) {
|
!TE->isAltShuffle()) {
|
||||||
OrderedEntries.insert(TE.get());
|
OrderedEntries.insert(TE.get());
|
||||||
} else if (TE->State == TreeEntry::NeedToGather &&
|
return;
|
||||||
TE->getOpcode() == Instruction::ExtractElement &&
|
}
|
||||||
!TE->isAltShuffle() &&
|
if (TE->State == TreeEntry::NeedToGather) {
|
||||||
isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
|
if (TE->getOpcode() == Instruction::ExtractElement &&
|
||||||
->getVectorOperandType()) &&
|
!TE->isAltShuffle() &&
|
||||||
allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
|
isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
|
||||||
// Check that gather of extractelements can be represented as
|
->getVectorOperandType()) &&
|
||||||
// just a shuffle of a single vector with a single user only.
|
allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
|
||||||
OrdersType CurrentOrder;
|
// Check that gather of extractelements can be represented as
|
||||||
bool Reuse = canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
|
// just a shuffle of a single vector with a single user only.
|
||||||
if ((Reuse || !CurrentOrder.empty()) &&
|
OrdersType CurrentOrder;
|
||||||
!any_of(
|
bool Reuse =
|
||||||
VectorizableTree, [&TE](const std::unique_ptr<TreeEntry> &Entry) {
|
canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
|
||||||
return Entry->State == TreeEntry::NeedToGather &&
|
if ((Reuse || !CurrentOrder.empty()) &&
|
||||||
Entry.get() != TE.get() && Entry->isSame(TE->Scalars);
|
!any_of(VectorizableTree,
|
||||||
})) {
|
[&TE](const std::unique_ptr<TreeEntry> &Entry) {
|
||||||
|
return Entry->State == TreeEntry::NeedToGather &&
|
||||||
|
Entry.get() != TE.get() &&
|
||||||
|
Entry->isSame(TE->Scalars);
|
||||||
|
})) {
|
||||||
|
OrderedEntries.insert(TE.get());
|
||||||
|
GathersToOrders.try_emplace(TE.get(), CurrentOrder);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Optional<OrdersType> CurrentOrder =
|
||||||
|
findReusedOrderedScalars(*TE.get())) {
|
||||||
OrderedEntries.insert(TE.get());
|
OrderedEntries.insert(TE.get());
|
||||||
GathersToOrders.try_emplace(TE.get(), CurrentOrder);
|
GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (TE->State != TreeEntry::Vectorize)
|
|
||||||
NonVectorized.push_back(TE.get());
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Checks if the operands of the users are reordarable and have only single
|
// Checks if the operands of the users are reordarable and have only single
|
||||||
|
|
@ -2911,7 +3006,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
|
||||||
for (TreeEntry *TE : OrderedEntries) {
|
for (TreeEntry *TE : OrderedEntries) {
|
||||||
if (!(TE->State == TreeEntry::Vectorize ||
|
if (!(TE->State == TreeEntry::Vectorize ||
|
||||||
(TE->State == TreeEntry::NeedToGather &&
|
(TE->State == TreeEntry::NeedToGather &&
|
||||||
TE->getOpcode() == Instruction::ExtractElement)) ||
|
GathersToOrders.count(TE))) ||
|
||||||
TE->UserTreeIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
|
TE->UserTreeIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
|
||||||
!all_of(drop_begin(TE->UserTreeIndices),
|
!all_of(drop_begin(TE->UserTreeIndices),
|
||||||
[TE](const EdgeInfo &EI) {
|
[TE](const EdgeInfo &EI) {
|
||||||
|
|
@ -2946,7 +3041,9 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
|
||||||
}
|
}
|
||||||
// All operands are reordered and used only in this node - propagate the
|
// All operands are reordered and used only in this node - propagate the
|
||||||
// most used order to the user node.
|
// most used order to the user node.
|
||||||
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo> OrdersUses;
|
MapVector<OrdersType, unsigned,
|
||||||
|
DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
|
||||||
|
OrdersUses;
|
||||||
SmallPtrSet<const TreeEntry *, 4> VisitedOps;
|
SmallPtrSet<const TreeEntry *, 4> VisitedOps;
|
||||||
for (const auto &Op : Data.second) {
|
for (const auto &Op : Data.second) {
|
||||||
TreeEntry *OpTE = Op.second;
|
TreeEntry *OpTE = Op.second;
|
||||||
|
|
@ -2969,13 +3066,14 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
|
||||||
return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
|
return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
|
||||||
});
|
});
|
||||||
fixupOrderingIndices(CurrentOrder);
|
fixupOrderingIndices(CurrentOrder);
|
||||||
++OrdersUses.try_emplace(CurrentOrder).first->getSecond();
|
++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;
|
||||||
} else {
|
} else {
|
||||||
++OrdersUses.try_emplace(Order).first->getSecond();
|
++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
|
||||||
}
|
}
|
||||||
if (VisitedOps.insert(OpTE).second)
|
if (VisitedOps.insert(OpTE).second)
|
||||||
OrdersUses.try_emplace({}, 0).first->getSecond() +=
|
OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
|
||||||
OpTE->UserTreeIndices.size();
|
OpTE->UserTreeIndices.size();
|
||||||
|
assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0.");
|
||||||
--OrdersUses[{}];
|
--OrdersUses[{}];
|
||||||
}
|
}
|
||||||
// If no orders - skip current nodes and jump to the next one, if any.
|
// If no orders - skip current nodes and jump to the next one, if any.
|
||||||
|
|
@ -2987,9 +3085,9 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Choose the best order.
|
// Choose the best order.
|
||||||
ArrayRef<unsigned> BestOrder = OrdersUses.begin()->first;
|
ArrayRef<unsigned> BestOrder = OrdersUses.front().first;
|
||||||
unsigned Cnt = OrdersUses.begin()->second;
|
unsigned Cnt = OrdersUses.front().second;
|
||||||
for (const auto &Pair : llvm::drop_begin(OrdersUses)) {
|
for (const auto &Pair : drop_begin(OrdersUses)) {
|
||||||
if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
|
if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
|
||||||
BestOrder = Pair.first;
|
BestOrder = Pair.first;
|
||||||
Cnt = Pair.second;
|
Cnt = Pair.second;
|
||||||
|
|
@ -3032,10 +3130,13 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
|
||||||
}
|
}
|
||||||
// For gathers just need to reorder its scalars.
|
// For gathers just need to reorder its scalars.
|
||||||
for (TreeEntry *Gather : GatherOps) {
|
for (TreeEntry *Gather : GatherOps) {
|
||||||
if (!Gather->ReuseShuffleIndices.empty())
|
|
||||||
continue;
|
|
||||||
assert(Gather->ReorderIndices.empty() &&
|
assert(Gather->ReorderIndices.empty() &&
|
||||||
"Unexpected reordering of gathers.");
|
"Unexpected reordering of gathers.");
|
||||||
|
if (!Gather->ReuseShuffleIndices.empty()) {
|
||||||
|
// Just reorder reuses indices.
|
||||||
|
reorderReuses(Gather->ReuseShuffleIndices, Mask);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
reorderScalars(Gather->Scalars, Mask);
|
reorderScalars(Gather->Scalars, Mask);
|
||||||
OrderedEntries.remove(Gather);
|
OrderedEntries.remove(Gather);
|
||||||
}
|
}
|
||||||
|
|
@ -7369,9 +7470,7 @@ struct SLPVectorizer : public FunctionPass {
|
||||||
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
|
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool doInitialization(Module &M) override {
|
bool doInitialization(Module &M) override { return false; }
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool runOnFunction(Function &F) override {
|
bool runOnFunction(Function &F) override {
|
||||||
if (skipFunction(F))
|
if (skipFunction(F))
|
||||||
|
|
|
||||||
|
|
@ -32,21 +32,19 @@ define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
|
||||||
|
|
||||||
define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
|
define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
|
||||||
; CHECK-LABEL: @store_chain_v2i64(
|
; CHECK-LABEL: @store_chain_v2i64(
|
||||||
; CHECK-NEXT: [[A_1:%.*]] = getelementptr i64, i64* [[A:%.*]], i64 1
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
|
||||||
; CHECK-NEXT: [[B_1:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 1
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
|
||||||
; CHECK-NEXT: [[C_1:%.*]] = getelementptr i64, i64* [[C:%.*]], i64 1
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
|
||||||
; CHECK-NEXT: [[V0_0:%.*]] = load i64, i64* [[A]], align 8
|
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
|
||||||
; CHECK-NEXT: [[V0_1:%.*]] = load i64, i64* [[A_1]], align 8
|
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
|
||||||
; CHECK-NEXT: [[V1_0:%.*]] = load i64, i64* [[B]], align 8
|
; CHECK-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
|
||||||
; CHECK-NEXT: [[V1_1:%.*]] = load i64, i64* [[B_1]], align 8
|
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>
|
||||||
; CHECK-NEXT: [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
|
; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
|
||||||
; CHECK-NEXT: [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
|
; CHECK-NEXT: [[TMP9:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
|
||||||
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
|
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3>
|
||||||
; CHECK-NEXT: [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
|
; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[TMP10]], [[TMP7]]
|
||||||
; CHECK-NEXT: [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
|
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
|
||||||
; CHECK-NEXT: [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
|
; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* [[TMP12]], align 8
|
||||||
; CHECK-NEXT: store i64 [[TMP2_0]], i64* [[C]], align 8
|
|
||||||
; CHECK-NEXT: store i64 [[TMP2_1]], i64* [[C_1]], align 8
|
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
%a.0 = getelementptr i64, i64* %a, i64 0
|
%a.0 = getelementptr i64, i64* %a, i64 0
|
||||||
|
|
|
||||||
|
|
@ -32,21 +32,19 @@ define <2 x i64> @build_vec_v2i64(<2 x i64> %v0, <2 x i64> %v1) {
|
||||||
|
|
||||||
define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
|
define void @store_chain_v2i64(i64* %a, i64* %b, i64* %c) {
|
||||||
; CHECK-LABEL: @store_chain_v2i64(
|
; CHECK-LABEL: @store_chain_v2i64(
|
||||||
; CHECK-NEXT: [[A_1:%.*]] = getelementptr i64, i64* [[A:%.*]], i64 1
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[A:%.*]] to <2 x i64>*
|
||||||
; CHECK-NEXT: [[B_1:%.*]] = getelementptr i64, i64* [[B:%.*]], i64 1
|
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
|
||||||
; CHECK-NEXT: [[C_1:%.*]] = getelementptr i64, i64* [[C:%.*]], i64 1
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[B:%.*]] to <2 x i64>*
|
||||||
; CHECK-NEXT: [[V0_0:%.*]] = load i64, i64* [[A]], align 8
|
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
|
||||||
; CHECK-NEXT: [[V0_1:%.*]] = load i64, i64* [[A_1]], align 8
|
; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
|
||||||
; CHECK-NEXT: [[V1_0:%.*]] = load i64, i64* [[B]], align 8
|
; CHECK-NEXT: [[TMP6:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
|
||||||
; CHECK-NEXT: [[V1_1:%.*]] = load i64, i64* [[B_1]], align 8
|
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> [[TMP6]], <2 x i32> <i32 1, i32 2>
|
||||||
; CHECK-NEXT: [[TMP0_0:%.*]] = add i64 [[V0_0]], [[V1_0]]
|
; CHECK-NEXT: [[TMP8:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
|
||||||
; CHECK-NEXT: [[TMP0_1:%.*]] = add i64 [[V0_1]], [[V1_1]]
|
; CHECK-NEXT: [[TMP9:%.*]] = sub <2 x i64> [[TMP2]], [[TMP4]]
|
||||||
; CHECK-NEXT: [[TMP1_0:%.*]] = sub i64 [[V0_0]], [[V1_0]]
|
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 0, i32 3>
|
||||||
; CHECK-NEXT: [[TMP1_1:%.*]] = sub i64 [[V0_1]], [[V1_1]]
|
; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[TMP10]], [[TMP7]]
|
||||||
; CHECK-NEXT: [[TMP2_0:%.*]] = add i64 [[TMP0_0]], [[TMP0_1]]
|
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[C:%.*]] to <2 x i64>*
|
||||||
; CHECK-NEXT: [[TMP2_1:%.*]] = add i64 [[TMP1_0]], [[TMP1_1]]
|
; CHECK-NEXT: store <2 x i64> [[TMP11]], <2 x i64>* [[TMP12]], align 8
|
||||||
; CHECK-NEXT: store i64 [[TMP2_0]], i64* [[C]], align 8
|
|
||||||
; CHECK-NEXT: store i64 [[TMP2_1]], i64* [[C_1]], align 8
|
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
%a.0 = getelementptr i64, i64* %a, i64 0
|
%a.0 = getelementptr i64, i64* %a, i64 0
|
||||||
|
|
|
||||||
|
|
@ -600,21 +600,18 @@ define void @ChecksExtractScores_different_vectors(double* %storeArray, double*
|
||||||
; CHECK-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
|
; CHECK-NEXT: [[LOADVEC4:%.*]] = load <2 x double>, <2 x double>* [[VECPTR4:%.*]], align 4
|
||||||
; CHECK-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
|
; CHECK-NEXT: [[EXTRB0:%.*]] = extractelement <2 x double> [[LOADVEC3]], i32 0
|
||||||
; CHECK-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
|
; CHECK-NEXT: [[EXTRB1:%.*]] = extractelement <2 x double> [[LOADVEC4]], i32 1
|
||||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRB0]], i32 0
|
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[EXTRA1]], i32 0
|
||||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRA1]], i32 1
|
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[EXTRB0]], i32 1
|
||||||
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1
|
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP4]], [[TMP2]]
|
||||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0
|
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
|
||||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
|
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
|
||||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP7]], i32 1
|
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[EXTRB1]], i32 1
|
||||||
; CHECK-NEXT: [[TMP9:%.*]] = fmul <2 x double> [[TMP4]], [[TMP8]]
|
; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP2]]
|
||||||
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[EXTRA0]], i32 0
|
; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], [[SHUFFLE]]
|
||||||
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[EXTRB1]], i32 1
|
|
||||||
; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP2]]
|
|
||||||
; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], [[TMP9]]
|
|
||||||
; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
|
; CHECK-NEXT: [[SIDX0:%.*]] = getelementptr inbounds double, double* [[STOREARRAY:%.*]], i64 0
|
||||||
; CHECK-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1
|
; CHECK-NEXT: [[SIDX1:%.*]] = getelementptr inbounds double, double* [[STOREARRAY]], i64 1
|
||||||
; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
|
; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[SIDX0]] to <2 x double>*
|
||||||
; CHECK-NEXT: store <2 x double> [[TMP13]], <2 x double>* [[TMP14]], align 8
|
; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8
|
||||||
; CHECK-NEXT: ret void
|
; CHECK-NEXT: ret void
|
||||||
;
|
;
|
||||||
%idx0 = getelementptr inbounds double, double* %array, i64 0
|
%idx0 = getelementptr inbounds double, double* %array, i64 0
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue