Fix V3OrderParallel scoring contraction hang (#6052)

This commit is contained in:
Bartłomiej Chmiel 2025-05-29 16:31:57 +02:00 committed by GitHub
parent 5fcd0e52e7
commit 7dbe4f1807
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 170 additions and 69 deletions

View File

@ -127,7 +127,7 @@ constexpr unsigned PART_SIBLING_EDGE_LIMIT = 26;
// and we probably don't want a huge number of mTaskGraphp in practice anyway
// (50 to 100 is typical.)
//
// If the user doesn't give one with '--threads-max-mTaskGraphp', we'll set the
// If the user doesn't give one with '--threads-max-mtasks', we'll set the
// maximum # of MTasks to
// (# of threads * PART_DEFAULT_MAX_MTASKS_PER_THREAD)
constexpr unsigned PART_DEFAULT_MAX_MTASKS_PER_THREAD = 50;
@ -137,7 +137,7 @@ constexpr unsigned PART_DEFAULT_MAX_MTASKS_PER_THREAD = 50;
//######################################################################
// Misc graph and assertion utilities
static void partCheckCachedScoreVsActual(uint32_t cached, uint32_t actual) {
static void partCheckCachedScoreVsActual(uint64_t cached, uint64_t actual) {
#if PART_STEPPED_COST
// Cached CP might be a little bigger than actual, due to stepped CPs.
// Example:
@ -160,8 +160,8 @@ static void partCheckCachedScoreVsActual(uint32_t cached, uint32_t actual) {
struct EdgeKey final {
// Node: Structure layout chosen to minimize padding in PairingHeap<*>::Node
uint64_t m_id; // Unique ID part of edge score
uint32_t m_score; // Score part of ID
void increase(uint32_t score) {
uint64_t m_score; // Score part of ID
void increase(uint64_t score) {
UDEBUGONLY(UASSERT(score >= m_score, "Must increase"););
m_score = score;
}
@ -179,7 +179,7 @@ using EdgeHeap = PairingHeap<EdgeKey>;
struct MergeCandidateKey final {
// Note: Structure layout chosen to minimize padding in PairingHeap<*>::Node
uint64_t m_id; // Unique ID part of edge score
uint32_t m_score; // Score part of ID
uint64_t m_score; // Score part of ID
bool operator<(const MergeCandidateKey& other) const {
// First by Score then by ID, but notice that we want minimums using a max-heap, so reverse
return m_score > other.m_score || (m_score == other.m_score && m_id > other.m_id);
@ -222,7 +222,7 @@ public:
bool mergeWouldCreateCycle() const; // Instead of virtual method
inline void rescore();
uint32_t score() const { return m_key.m_score; }
uint64_t score() const { return m_key.m_score; }
static MergeCandidate* heapNodeToElem(MergeCandidateScoreboard::Node* nodep) {
return static_cast<MergeCandidate*>(nodep);
@ -290,7 +290,7 @@ public:
// with updated critical path.
void resetCriticalPaths();
uint32_t cachedCp(GraphWay way) const { return m_edgeHeapNode[way].key().m_score; }
uint64_t cachedCp(GraphWay way) const { return m_edgeHeapNode[way].key().m_score; }
// Convert from the address of the m_edgeHeapNode[way] in an MTaskEdge back to the MTaskEdge
static const MTaskEdge* toMTaskEdge(GraphWay way, const EdgeHeap::Node* nodep) {
@ -327,12 +327,12 @@ private:
// Cost estimate for this LogicMTask, derived from V3InstrCount.
// In abstract time units.
uint32_t m_cost = 0;
uint64_t m_cost = 0;
// Cost of critical paths going FORWARD from graph-start to the start
// of this vertex, and also going REVERSE from the end of the graph to
// the end of the vertex. Same units as m_cost.
std::array<uint32_t, GraphWay::NUM_WAYS> m_critPathCost;
std::array<uint64_t, GraphWay::NUM_WAYS> m_critPathCost;
const uint32_t m_id; // Unique LogicMTask ID number
static uint32_t s_nextId; // Next ID number to use
@ -361,7 +361,7 @@ public:
: V3GraphVertex{graphp}
, m_id{s_nextId++} {
UASSERT(s_nextId < 0xFFFFFFFFUL, "Too many mTaskGraphp");
for (uint32_t& item : m_critPathCost) item = 0;
for (uint64_t& item : m_critPathCost) item = 0;
if (mVtxp) {
m_mVertices.linkBack(mVtxp);
if (const OrderLogicVertex* const olvp = mVtxp->logicp()) {
@ -392,10 +392,10 @@ public:
// the final C++ output.
uint32_t id() const { return m_id; }
// Abstract cost of every logic mtask
uint32_t cost() const VL_MT_SAFE { return m_cost; }
void setCost(uint32_t cost) { m_cost = cost; } // For tests only
uint32_t stepCost() const { return stepCost(m_cost); }
static uint32_t stepCost(uint32_t cost) {
uint64_t cost() const VL_MT_SAFE { return m_cost; }
void setCost(uint64_t cost) { m_cost = cost; } // For tests only
uint64_t stepCost() const { return stepCost(m_cost); }
static uint64_t stepCost(uint64_t cost) {
#if PART_STEPPED_COST
// Round cost up to the nearest 5%. Use this when computing all
// critical paths. The idea is that critical path changes don't
@ -410,7 +410,7 @@ public:
logcost = ceil(logcost);
logcost = logcost / 20.0;
const uint32_t stepCost = static_cast<uint32_t>(exp(logcost));
const uint64_t stepCost = static_cast<uint64_t>(exp(logcost));
UDEBUGONLY(UASSERT_STATIC(stepCost >= cost, "stepped cost error exceeded"););
UDEBUGONLY(UASSERT_STATIC(stepCost <= ((cost * 11 / 10)), "stepped cost error exceeded"););
return stepCost;
@ -426,7 +426,7 @@ public:
// Add to the edge heap
LogicMTask* const relativep = edgep->furtherMTaskp<N_Way>();
// Value is !way cp to this edge
const uint32_t cp = relativep->stepCost() + relativep->critPathCost(inv);
const uint64_t cp = relativep->stepCost() + relativep->critPathCost(inv);
//
m_edgeHeap[way].insert(&edgep->m_edgeHeapNode[way], {relativep->id(), cp});
}
@ -462,8 +462,8 @@ public:
for (const V3GraphEdge& edge : edges<N_Way>()) {
const LogicMTask* const relativep
= static_cast<const LogicMTask*>(edge.furtherp<N_Way>());
const uint32_t cachedCp = static_cast<const MTaskEdge&>(edge).cachedCp(way);
const uint32_t cp = relativep->critPathCost(way.invert()) + relativep->stepCost();
const uint64_t cachedCp = static_cast<const MTaskEdge&>(edge).cachedCp(way);
const uint64_t cp = relativep->critPathCost(way.invert()) + relativep->stepCost();
partCheckCachedScoreVsActual(cachedCp, cp);
}
}
@ -477,10 +477,10 @@ public:
return out.str();
}
void setCritPathCost(GraphWay way, uint32_t cost) { m_critPathCost[way] = cost; }
uint32_t critPathCost(GraphWay way) const { return m_critPathCost[way]; }
void setCritPathCost(GraphWay way, uint64_t cost) { m_critPathCost[way] = cost; }
uint64_t critPathCost(GraphWay way) const { return m_critPathCost[way]; }
template <GraphWay::en N_Way>
uint32_t critPathCostWithout(const V3GraphEdge* withoutp) const {
uint64_t critPathCostWithout(const V3GraphEdge* withoutp) const {
const GraphWay way{N_Way};
const GraphWay inv = way.invert();
// Compute the critical path cost wayward to this node, without considering edge
@ -574,7 +574,7 @@ public:
// Follow the entire critical path
std::vector<const LogicMTask*> path;
uint32_t totalCost = 0;
uint64_t totalCost = 0;
for (const LogicMTask* nextp = startp; nextp;) {
path.push_back(nextp);
totalCost += nextp->cost();
@ -624,25 +624,25 @@ bool MergeCandidate::mergeWouldCreateCycle() const {
: static_cast<const MTaskEdge*>(this)->mergeWouldCreateCycle();
}
static uint32_t siblingScore(const SiblingMC* sibsp) {
static uint64_t siblingScore(const SiblingMC* sibsp) {
const LogicMTask* const ap = sibsp->ap();
const LogicMTask* const bp = sibsp->bp();
const uint32_t mergedCpCostFwd
const uint64_t mergedCpCostFwd
= std::max(ap->critPathCost(GraphWay::FORWARD), bp->critPathCost(GraphWay::FORWARD));
const uint32_t mergedCpCostRev
const uint64_t mergedCpCostRev
= std::max(ap->critPathCost(GraphWay::REVERSE), bp->critPathCost(GraphWay::REVERSE));
return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(ap->cost() + bp->cost());
}
static uint32_t edgeScore(const MTaskEdge* edgep) {
static uint64_t edgeScore(const MTaskEdge* edgep) {
// Score this edge. Lower is better. The score is the new local CP
// length if we merge these mTaskGraphp. ("Local" means the longest
// critical path running through the merged node.)
const LogicMTask* const top = edgep->toMTaskp();
const LogicMTask* const fromp = edgep->fromMTaskp();
const uint32_t mergedCpCostFwd = std::max(fromp->critPathCost(GraphWay::FORWARD),
const uint64_t mergedCpCostFwd = std::max(fromp->critPathCost(GraphWay::FORWARD),
top->critPathCostWithout<GraphWay::FORWARD>(edgep));
const uint32_t mergedCpCostRev = std::max(fromp->critPathCostWithout<GraphWay::REVERSE>(edgep),
const uint64_t mergedCpCostRev = std::max(fromp->critPathCostWithout<GraphWay::REVERSE>(edgep),
top->critPathCost(GraphWay::REVERSE));
return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(fromp->cost() + top->cost());
}
@ -724,7 +724,7 @@ static void partInitHalfCriticalPaths(V3Graph& mTaskGraph, bool checkOnly) {
for (const V3GraphVertex* vertexp; (vertexp = order.nextp());) {
const LogicMTask* const mtaskcp = static_cast<const LogicMTask*>(vertexp);
LogicMTask* const mtaskp = const_cast<LogicMTask*>(mtaskcp);
uint32_t cpCost = 0;
uint64_t cpCost = 0;
#if VL_DEBUG
std::unordered_set<V3GraphVertex*> relatives;
#endif
@ -739,7 +739,7 @@ static void partInitHalfCriticalPaths(V3Graph& mTaskGraph, bool checkOnly) {
#endif
const LogicMTask* const relativep = static_cast<LogicMTask*>(edge.furtherp<rev>());
cpCost = std::max(cpCost, (relativep->critPathCost(way)
+ static_cast<uint32_t>(relativep->stepCost())));
+ static_cast<uint64_t>(relativep->stepCost())));
}
if (checkOnly) {
partCheckCachedScoreVsActual(mtaskp->critPathCost(way), cpCost);
@ -798,8 +798,8 @@ class PropagateCp final {
// We keep pending vertices in a heap during critical path propagation
struct PendingKey final {
LogicMTask* m_mtaskp; // The vertex in the heap
uint32_t m_score; // The score of this entry
void increase(uint32_t score) {
uint64_t m_score; // The score of this entry
void increase(uint64_t score) {
UDEBUGONLY(UASSERT(score >= m_score, "Must increase"););
m_score = score;
}
@ -861,7 +861,7 @@ private:
}
public:
void cpHasIncreased(V3GraphVertex* vxp, uint32_t newInclusiveCp) {
void cpHasIncreased(V3GraphVertex* vxp, uint64_t newInclusiveCp) {
constexpr GraphWay way{N_Way};
constexpr GraphWay inv{way.invert()};
@ -877,13 +877,13 @@ public:
relativep->m_edgeHeap[inv].increaseKey(&edgeHeapNode, newInclusiveCp);
}
const uint32_t critPathCost = relativep->critPathCost(way);
const uint64_t critPathCost = relativep->critPathCost(way);
if (critPathCost >= newInclusiveCp) continue;
// relativep's critPathCost() is out of step with its longest !wayward edge.
// Schedule that to be resolved.
const uint32_t newVal = newInclusiveCp - critPathCost;
const uint64_t newVal = newInclusiveCp - critPathCost;
if (PendingHeapNode* const nodep = static_cast<PendingHeapNode*>(relativep->userp())) {
// Already in heap. Increase score if needed.
@ -924,16 +924,16 @@ public:
m_pendingHeap.remove(maxp);
// Pick up values
LogicMTask* const mtaskp = maxp->key().m_mtaskp;
const uint32_t cpGrowBy = maxp->key().m_score;
const uint64_t cpGrowBy = maxp->key().m_score;
// Free the heap node, we are done with it
freeNode(maxp);
mtaskp->userp(nullptr);
// Update the critPathCost of mtaskp, that was out-of-date with respect to its edges
const uint32_t startCp = mtaskp->critPathCost(way);
const uint32_t newCp = startCp + cpGrowBy;
const uint64_t startCp = mtaskp->critPathCost(way);
const uint64_t newCp = startCp + cpGrowBy;
if (VL_UNLIKELY(m_slowAsserts)) {
// Check that CP matches that of the longest edge wayward of vxp.
const uint32_t edgeCp = mtaskp->m_edgeHeap[inv].max()->key().m_score;
const uint64_t edgeCp = mtaskp->m_edgeHeap[inv].max()->key().m_score;
UASSERT_OBJ(edgeCp == newCp, mtaskp, "CP doesn't match longest wayward edge");
// Confirm that we only set each node's CP once. That's an
// important property of PropagateCp which allows it to be far
@ -1114,15 +1114,17 @@ class Contraction final {
// TYPES
// New CP information for mtaskp reflecting an upcoming merge
struct NewCp final {
uint32_t cp;
uint32_t propagateCp;
uint64_t cp;
uint64_t propagateCp;
bool propagate;
};
// MEMBERS
V3Graph& m_mTaskGraph; // The Mtask graph
uint32_t m_scoreLimit; // Sloppy score allowed when picking merges
uint32_t m_scoreLimitBeforeRescore = 0xffffffff; // Next score rescore at
uint64_t m_scoreLimit; // Sloppy score allowed when picking merges
uint64_t m_scoreLimitBeforeRescore
= std::numeric_limits<decltype(m_scoreLimitBeforeRescore)>::max(); // Next score rescore
// at
unsigned m_mergesSinceRescore = 0; // Merges since last rescore
const bool m_slowAsserts; // Take extra time to validate algorithm
MergeCandidateScoreboard m_sb; // Scoreboard
@ -1135,7 +1137,7 @@ class Contraction final {
public:
// CONSTRUCTORS
Contraction(V3Graph& mTaskGraph, uint32_t scoreLimit, LogicMTask* entryMTaskp,
Contraction(V3Graph& mTaskGraph, uint64_t scoreLimit, LogicMTask* entryMTaskp,
LogicMTask* exitMTaskp, bool slowAsserts)
: m_mTaskGraph{mTaskGraph}
, m_scoreLimit{scoreLimit}
@ -1199,9 +1201,9 @@ public:
UASSERT(!m_sb.needsRescore(mergeCanp),
"Need-rescore items should not be returned by bestp");
}
const uint32_t cachedScore = mergeCanp->score();
const uint64_t cachedScore = mergeCanp->score();
mergeCanp->rescore();
const uint32_t actualScore = mergeCanp->score();
const uint64_t actualScore = mergeCanp->score();
if (actualScore > cachedScore) {
// Cached score is out-of-date.
@ -1226,12 +1228,16 @@ public:
// limit and keep going...
const unsigned mtaskCount = m_mTaskGraph.vertices().size();
if (mtaskCount > maxMTasks) {
const uint32_t oldLimit = m_scoreLimit;
const uint64_t oldLimit = m_scoreLimit;
m_scoreLimit = (m_scoreLimit * 120) / 100;
v3Global.rootp()->fileline()->v3warn(
UNOPTTHREADS, "Thread scheduler is unable to provide requested "
"parallelism; suggest asking for fewer threads.");
UINFO(1,
FileLine* const flp = v3Global.rootp()->fileline();
if (!flp->warnIsOff(V3ErrorCode::UNOPTTHREADS)) {
flp->v3warn(UNOPTTHREADS,
"Thread scheduler is unable to provide requested "
"parallelism; suggest asking for fewer threads.");
flp->modifyWarnOff(V3ErrorCode::UNOPTTHREADS, true);
}
UINFO(6,
"Critical path limit was=" << oldLimit << " now=" << m_scoreLimit);
continue;
}
@ -1322,7 +1328,7 @@ private:
// Return new wayward-CP for mtaskp reflecting its upcoming merge
// with otherp. Set 'result.propagate' if mtaskp's wayward
// relatives will see a new wayward CP from this merge.
uint32_t newCp;
uint64_t newCp;
if (mergeEdgep) {
if (mtaskp == mergeEdgep->furtherp<way>()) {
newCp = std::max(otherp->critPathCost(way),
@ -1335,8 +1341,8 @@ private:
newCp = std::max(otherp->critPathCost(way), mtaskp->critPathCost(way));
}
const uint32_t origRelativesCp = mtaskp->critPathCost(way) + mtaskp->stepCost();
const uint32_t newRelativesCp
const uint64_t origRelativesCp = mtaskp->critPathCost(way) + mtaskp->stepCost();
const uint64_t newRelativesCp
= newCp + LogicMTask::stepCost(mtaskp->cost() + otherp->cost());
NewCp result;
@ -1506,7 +1512,8 @@ private:
UINFO(6, "Did rescore. Merges since previous = " << m_mergesSinceRescore);
m_mergesSinceRescore = 0;
m_scoreLimitBeforeRescore = 0xffffffff;
m_scoreLimitBeforeRescore
= std::numeric_limits<decltype(m_scoreLimitBeforeRescore)>::max();
}
void makeSiblingMC(LogicMTask* ap, LogicMTask* bp) {
@ -1545,8 +1552,8 @@ private:
// functions are efficient enough and using more optimized methods (e.g.: sorting networks)
// has no measurable benefit.
struct alignas(16) SortingRecord final {
uint64_t m_id;
uint32_t m_cp;
uint64_t m_cp;
uint32_t m_id;
uint8_t m_idx;
static_assert(PART_SIBLING_EDGE_LIMIT <= std::numeric_limits<uint8_t>::max(),
"m_idx must fit all indices into 'neighbors'");
@ -1689,7 +1696,7 @@ public:
selfTestChain();
}
static void apply(V3Graph& mTaskGraph, uint32_t scoreLimit, LogicMTask* entryMTaskp,
static void apply(V3Graph& mTaskGraph, uint64_t scoreLimit, LogicMTask* entryMTaskp,
LogicMTask* exitMTaskp, bool slowAsserts) {
Contraction{mTaskGraph, scoreLimit, entryMTaskp, exitMTaskp, slowAsserts};
}
@ -2056,18 +2063,18 @@ static void debugMTaskGraphStats(V3Graph& graph, const string& stage) {
UINFO(4, "\n");
UINFO(4, " Stats for " << stage);
uint32_t mtaskCount = 0;
uint32_t totalCost = 0;
std::array<uint32_t, 32> mtaskCostHist;
mtaskCostHist.fill(0);
uint64_t mtaskCount = 0;
uint64_t totalCost = 0;
constexpr int scoreBits = std::numeric_limits<uint64_t>::digits;
std::array<uint64_t, scoreBits> mtaskCostHist{};
for (const V3GraphVertex& mtask : graph.vertices()) {
++mtaskCount;
uint32_t mtaskCost = mtask.as<const LogicMTask>()->cost();
uint64_t mtaskCost = mtask.as<const LogicMTask>()->cost();
totalCost += mtaskCost;
unsigned log2Cost = 0;
while (mtaskCost >>= 1) ++log2Cost;
UASSERT(log2Cost < 32, "log2Cost overflow in debugMTaskGraphStats");
UASSERT(log2Cost < scoreBits, "log2Cost overflow in debugMTaskGraphStats");
++mtaskCostHist[log2Cost];
}
UINFO(4, " Total mtask cost = " << totalCost);
@ -2075,7 +2082,7 @@ static void debugMTaskGraphStats(V3Graph& graph, const string& stage) {
UINFO(4, " Avg cost / mtask = " << ((mtaskCount > 0) ? cvtToStr(totalCost / mtaskCount)
: "INF!"));
UINFO(4, " Histogram of mtask costs:");
for (unsigned i = 0; i < 32; ++i) {
for (unsigned i = 0; i < scoreBits; ++i) {
if (mtaskCostHist[i]) {
UINFO(4, " 2^" << i << ": " << mtaskCostHist[i]);
V3Stats::addStat("MTask graph, " + stage + ", mtask cost 2^" + (i < 10 ? " " : "")
@ -2185,8 +2192,8 @@ class Partitioner final {
return fanIn + fanOut == 4;
}
uint32_t setupMTaskDeps() VL_MT_DISABLED {
uint32_t totalGraphCost = 0;
uint64_t setupMTaskDeps() VL_MT_DISABLED {
uint64_t totalGraphCost = 0;
// Artificial single entry point vertex in the MTask graph to allow sibling merges.
// This is required as otherwise disjoint sub-graphs could not be merged, but the
@ -2280,7 +2287,7 @@ class Partitioner final {
// OrderMoveVertex. Over time, we'll merge MTasks together and
// eventually each MTask will wrap a large number of MTaskMoveVertices
// (and the logic nodes therein.)
const uint32_t totalGraphCost = setupMTaskDeps();
const uint64_t totalGraphCost = setupMTaskDeps();
debugMTaskGraphStats(*m_mTaskGraphp, "initial");
@ -2328,7 +2335,7 @@ class Partitioner final {
// when scheduling them.
const unsigned fudgeNumerator = 3;
const unsigned fudgeDenominator = 5;
const uint32_t cpLimit
const uint64_t cpLimit
= ((totalGraphCost * fudgeNumerator) / (targetParFactor * fudgeDenominator));
UINFO(4, "Partitioner set cpLimit = " << cpLimit);

View File

@ -0,0 +1,12 @@
// -*- mode: C++; c-file-style: "cc-mode" -*-
//*************************************************************************
//
// Copyright 2025 by Antmicro. This program is free software; you can
// redistribute it and/or modify it under the terms of either the GNU
// Lesser General Public License Version 3 or the Perl Artistic License
// Version 2.0.
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
//
//*************************************************************************
extern "C" void dpii_call(void) {}

View File

@ -0,0 +1,27 @@
#!/usr/bin/env python3
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
#
# Copyright 2025 by Wilson Snyder. This program is free software; you
# can redistribute it and/or modify it under the terms of either the GNU
# Lesser General Public License Version 3 or the Perl Artistic License
# Version 2.0.
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
import vltest_bootstrap
test.scenarios('vltmt')
test.clean_objs()
test.compile(
v_flags2=["t/t_instr_count_dpi_large.cpp"],
verilator_flags2=[
"--instr-count-dpi 999999999",
# Force UNOPTTHREADS error to cause Contraction limit increase beyond UINT32
"--threads-max-mtasks 1",
"-Wno-UNOPTTHREADS"
],
threads=2)
test.execute()
test.passes()

View File

@ -0,0 +1,26 @@
// DESCRIPTION: Verilator: Verilog Test module
//
// This file ONLY is placed under the Creative Commons Public Domain, for
// any use, without warranty, 2025 by Wilson Snyder.
// SPDX-License-Identifier: CC0-1.0
module t(clk);
input clk;
sub_0 sub_0(clk);
sub_1 sub_1(clk);
initial begin
$write("*-* All Finished *-*\n");
$finish;
end
endmodule
import "DPI-C" context function void dpii_call();
module sub_0(input clk); /*verilator hier_block*/
always @(posedge clk) dpii_call();
endmodule
module sub_1(input clk); /*verilator hier_block*/
always @(posedge clk) dpii_call();
endmodule

View File

@ -0,0 +1,29 @@
#!/usr/bin/env python3
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
#
# Copyright 2025 by Wilson Snyder. This program is free software; you
# can redistribute it and/or modify it under the terms of either the GNU
# Lesser General Public License Version 3 or the Perl Artistic License
# Version 2.0.
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
import vltest_bootstrap
test.scenarios('vltmt')
test.clean_objs()
test.top_filename = "t/t_instr_count_dpi_large.v"
test.compile(
v_flags2=["t/t_instr_count_dpi_large.cpp"],
verilator_flags2=[
"--hierarchical",
"--instr-count-dpi 999999999",
# Force UNOPTTHREADS error to cause Contraction limit increase beyond UINT32
"--threads-max-mtasks 1",
"-Wno-UNOPTTHREADS"
],
threads=2)
test.execute()
test.passes()