Fix V3OrderParallel scoring contraction hang (#6052)
This commit is contained in:
parent
5fcd0e52e7
commit
7dbe4f1807
|
@ -127,7 +127,7 @@ constexpr unsigned PART_SIBLING_EDGE_LIMIT = 26;
|
|||
// and we probably don't want a huge number of mTaskGraphp in practice anyway
|
||||
// (50 to 100 is typical.)
|
||||
//
|
||||
// If the user doesn't give one with '--threads-max-mTaskGraphp', we'll set the
|
||||
// If the user doesn't give one with '--threads-max-mtasks', we'll set the
|
||||
// maximum # of MTasks to
|
||||
// (# of threads * PART_DEFAULT_MAX_MTASKS_PER_THREAD)
|
||||
constexpr unsigned PART_DEFAULT_MAX_MTASKS_PER_THREAD = 50;
|
||||
|
@ -137,7 +137,7 @@ constexpr unsigned PART_DEFAULT_MAX_MTASKS_PER_THREAD = 50;
|
|||
//######################################################################
|
||||
// Misc graph and assertion utilities
|
||||
|
||||
static void partCheckCachedScoreVsActual(uint32_t cached, uint32_t actual) {
|
||||
static void partCheckCachedScoreVsActual(uint64_t cached, uint64_t actual) {
|
||||
#if PART_STEPPED_COST
|
||||
// Cached CP might be a little bigger than actual, due to stepped CPs.
|
||||
// Example:
|
||||
|
@ -160,8 +160,8 @@ static void partCheckCachedScoreVsActual(uint32_t cached, uint32_t actual) {
|
|||
struct EdgeKey final {
|
||||
// Node: Structure layout chosen to minimize padding in PairingHeap<*>::Node
|
||||
uint64_t m_id; // Unique ID part of edge score
|
||||
uint32_t m_score; // Score part of ID
|
||||
void increase(uint32_t score) {
|
||||
uint64_t m_score; // Score part of ID
|
||||
void increase(uint64_t score) {
|
||||
UDEBUGONLY(UASSERT(score >= m_score, "Must increase"););
|
||||
m_score = score;
|
||||
}
|
||||
|
@ -179,7 +179,7 @@ using EdgeHeap = PairingHeap<EdgeKey>;
|
|||
struct MergeCandidateKey final {
|
||||
// Note: Structure layout chosen to minimize padding in PairingHeap<*>::Node
|
||||
uint64_t m_id; // Unique ID part of edge score
|
||||
uint32_t m_score; // Score part of ID
|
||||
uint64_t m_score; // Score part of ID
|
||||
bool operator<(const MergeCandidateKey& other) const {
|
||||
// First by Score then by ID, but notice that we want minimums using a max-heap, so reverse
|
||||
return m_score > other.m_score || (m_score == other.m_score && m_id > other.m_id);
|
||||
|
@ -222,7 +222,7 @@ public:
|
|||
bool mergeWouldCreateCycle() const; // Instead of virtual method
|
||||
|
||||
inline void rescore();
|
||||
uint32_t score() const { return m_key.m_score; }
|
||||
uint64_t score() const { return m_key.m_score; }
|
||||
|
||||
static MergeCandidate* heapNodeToElem(MergeCandidateScoreboard::Node* nodep) {
|
||||
return static_cast<MergeCandidate*>(nodep);
|
||||
|
@ -290,7 +290,7 @@ public:
|
|||
// with updated critical path.
|
||||
void resetCriticalPaths();
|
||||
|
||||
uint32_t cachedCp(GraphWay way) const { return m_edgeHeapNode[way].key().m_score; }
|
||||
uint64_t cachedCp(GraphWay way) const { return m_edgeHeapNode[way].key().m_score; }
|
||||
|
||||
// Convert from the address of the m_edgeHeapNode[way] in an MTaskEdge back to the MTaskEdge
|
||||
static const MTaskEdge* toMTaskEdge(GraphWay way, const EdgeHeap::Node* nodep) {
|
||||
|
@ -327,12 +327,12 @@ private:
|
|||
|
||||
// Cost estimate for this LogicMTask, derived from V3InstrCount.
|
||||
// In abstract time units.
|
||||
uint32_t m_cost = 0;
|
||||
uint64_t m_cost = 0;
|
||||
|
||||
// Cost of critical paths going FORWARD from graph-start to the start
|
||||
// of this vertex, and also going REVERSE from the end of the graph to
|
||||
// the end of the vertex. Same units as m_cost.
|
||||
std::array<uint32_t, GraphWay::NUM_WAYS> m_critPathCost;
|
||||
std::array<uint64_t, GraphWay::NUM_WAYS> m_critPathCost;
|
||||
|
||||
const uint32_t m_id; // Unique LogicMTask ID number
|
||||
static uint32_t s_nextId; // Next ID number to use
|
||||
|
@ -361,7 +361,7 @@ public:
|
|||
: V3GraphVertex{graphp}
|
||||
, m_id{s_nextId++} {
|
||||
UASSERT(s_nextId < 0xFFFFFFFFUL, "Too many mTaskGraphp");
|
||||
for (uint32_t& item : m_critPathCost) item = 0;
|
||||
for (uint64_t& item : m_critPathCost) item = 0;
|
||||
if (mVtxp) {
|
||||
m_mVertices.linkBack(mVtxp);
|
||||
if (const OrderLogicVertex* const olvp = mVtxp->logicp()) {
|
||||
|
@ -392,10 +392,10 @@ public:
|
|||
// the final C++ output.
|
||||
uint32_t id() const { return m_id; }
|
||||
// Abstract cost of every logic mtask
|
||||
uint32_t cost() const VL_MT_SAFE { return m_cost; }
|
||||
void setCost(uint32_t cost) { m_cost = cost; } // For tests only
|
||||
uint32_t stepCost() const { return stepCost(m_cost); }
|
||||
static uint32_t stepCost(uint32_t cost) {
|
||||
uint64_t cost() const VL_MT_SAFE { return m_cost; }
|
||||
void setCost(uint64_t cost) { m_cost = cost; } // For tests only
|
||||
uint64_t stepCost() const { return stepCost(m_cost); }
|
||||
static uint64_t stepCost(uint64_t cost) {
|
||||
#if PART_STEPPED_COST
|
||||
// Round cost up to the nearest 5%. Use this when computing all
|
||||
// critical paths. The idea is that critical path changes don't
|
||||
|
@ -410,7 +410,7 @@ public:
|
|||
logcost = ceil(logcost);
|
||||
logcost = logcost / 20.0;
|
||||
|
||||
const uint32_t stepCost = static_cast<uint32_t>(exp(logcost));
|
||||
const uint64_t stepCost = static_cast<uint64_t>(exp(logcost));
|
||||
UDEBUGONLY(UASSERT_STATIC(stepCost >= cost, "stepped cost error exceeded"););
|
||||
UDEBUGONLY(UASSERT_STATIC(stepCost <= ((cost * 11 / 10)), "stepped cost error exceeded"););
|
||||
return stepCost;
|
||||
|
@ -426,7 +426,7 @@ public:
|
|||
// Add to the edge heap
|
||||
LogicMTask* const relativep = edgep->furtherMTaskp<N_Way>();
|
||||
// Value is !way cp to this edge
|
||||
const uint32_t cp = relativep->stepCost() + relativep->critPathCost(inv);
|
||||
const uint64_t cp = relativep->stepCost() + relativep->critPathCost(inv);
|
||||
//
|
||||
m_edgeHeap[way].insert(&edgep->m_edgeHeapNode[way], {relativep->id(), cp});
|
||||
}
|
||||
|
@ -462,8 +462,8 @@ public:
|
|||
for (const V3GraphEdge& edge : edges<N_Way>()) {
|
||||
const LogicMTask* const relativep
|
||||
= static_cast<const LogicMTask*>(edge.furtherp<N_Way>());
|
||||
const uint32_t cachedCp = static_cast<const MTaskEdge&>(edge).cachedCp(way);
|
||||
const uint32_t cp = relativep->critPathCost(way.invert()) + relativep->stepCost();
|
||||
const uint64_t cachedCp = static_cast<const MTaskEdge&>(edge).cachedCp(way);
|
||||
const uint64_t cp = relativep->critPathCost(way.invert()) + relativep->stepCost();
|
||||
partCheckCachedScoreVsActual(cachedCp, cp);
|
||||
}
|
||||
}
|
||||
|
@ -477,10 +477,10 @@ public:
|
|||
return out.str();
|
||||
}
|
||||
|
||||
void setCritPathCost(GraphWay way, uint32_t cost) { m_critPathCost[way] = cost; }
|
||||
uint32_t critPathCost(GraphWay way) const { return m_critPathCost[way]; }
|
||||
void setCritPathCost(GraphWay way, uint64_t cost) { m_critPathCost[way] = cost; }
|
||||
uint64_t critPathCost(GraphWay way) const { return m_critPathCost[way]; }
|
||||
template <GraphWay::en N_Way>
|
||||
uint32_t critPathCostWithout(const V3GraphEdge* withoutp) const {
|
||||
uint64_t critPathCostWithout(const V3GraphEdge* withoutp) const {
|
||||
const GraphWay way{N_Way};
|
||||
const GraphWay inv = way.invert();
|
||||
// Compute the critical path cost wayward to this node, without considering edge
|
||||
|
@ -574,7 +574,7 @@ public:
|
|||
|
||||
// Follow the entire critical path
|
||||
std::vector<const LogicMTask*> path;
|
||||
uint32_t totalCost = 0;
|
||||
uint64_t totalCost = 0;
|
||||
for (const LogicMTask* nextp = startp; nextp;) {
|
||||
path.push_back(nextp);
|
||||
totalCost += nextp->cost();
|
||||
|
@ -624,25 +624,25 @@ bool MergeCandidate::mergeWouldCreateCycle() const {
|
|||
: static_cast<const MTaskEdge*>(this)->mergeWouldCreateCycle();
|
||||
}
|
||||
|
||||
static uint32_t siblingScore(const SiblingMC* sibsp) {
|
||||
static uint64_t siblingScore(const SiblingMC* sibsp) {
|
||||
const LogicMTask* const ap = sibsp->ap();
|
||||
const LogicMTask* const bp = sibsp->bp();
|
||||
const uint32_t mergedCpCostFwd
|
||||
const uint64_t mergedCpCostFwd
|
||||
= std::max(ap->critPathCost(GraphWay::FORWARD), bp->critPathCost(GraphWay::FORWARD));
|
||||
const uint32_t mergedCpCostRev
|
||||
const uint64_t mergedCpCostRev
|
||||
= std::max(ap->critPathCost(GraphWay::REVERSE), bp->critPathCost(GraphWay::REVERSE));
|
||||
return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(ap->cost() + bp->cost());
|
||||
}
|
||||
|
||||
static uint32_t edgeScore(const MTaskEdge* edgep) {
|
||||
static uint64_t edgeScore(const MTaskEdge* edgep) {
|
||||
// Score this edge. Lower is better. The score is the new local CP
|
||||
// length if we merge these mTaskGraphp. ("Local" means the longest
|
||||
// critical path running through the merged node.)
|
||||
const LogicMTask* const top = edgep->toMTaskp();
|
||||
const LogicMTask* const fromp = edgep->fromMTaskp();
|
||||
const uint32_t mergedCpCostFwd = std::max(fromp->critPathCost(GraphWay::FORWARD),
|
||||
const uint64_t mergedCpCostFwd = std::max(fromp->critPathCost(GraphWay::FORWARD),
|
||||
top->critPathCostWithout<GraphWay::FORWARD>(edgep));
|
||||
const uint32_t mergedCpCostRev = std::max(fromp->critPathCostWithout<GraphWay::REVERSE>(edgep),
|
||||
const uint64_t mergedCpCostRev = std::max(fromp->critPathCostWithout<GraphWay::REVERSE>(edgep),
|
||||
top->critPathCost(GraphWay::REVERSE));
|
||||
return mergedCpCostRev + mergedCpCostFwd + LogicMTask::stepCost(fromp->cost() + top->cost());
|
||||
}
|
||||
|
@ -724,7 +724,7 @@ static void partInitHalfCriticalPaths(V3Graph& mTaskGraph, bool checkOnly) {
|
|||
for (const V3GraphVertex* vertexp; (vertexp = order.nextp());) {
|
||||
const LogicMTask* const mtaskcp = static_cast<const LogicMTask*>(vertexp);
|
||||
LogicMTask* const mtaskp = const_cast<LogicMTask*>(mtaskcp);
|
||||
uint32_t cpCost = 0;
|
||||
uint64_t cpCost = 0;
|
||||
#if VL_DEBUG
|
||||
std::unordered_set<V3GraphVertex*> relatives;
|
||||
#endif
|
||||
|
@ -739,7 +739,7 @@ static void partInitHalfCriticalPaths(V3Graph& mTaskGraph, bool checkOnly) {
|
|||
#endif
|
||||
const LogicMTask* const relativep = static_cast<LogicMTask*>(edge.furtherp<rev>());
|
||||
cpCost = std::max(cpCost, (relativep->critPathCost(way)
|
||||
+ static_cast<uint32_t>(relativep->stepCost())));
|
||||
+ static_cast<uint64_t>(relativep->stepCost())));
|
||||
}
|
||||
if (checkOnly) {
|
||||
partCheckCachedScoreVsActual(mtaskp->critPathCost(way), cpCost);
|
||||
|
@ -798,8 +798,8 @@ class PropagateCp final {
|
|||
// We keep pending vertices in a heap during critical path propagation
|
||||
struct PendingKey final {
|
||||
LogicMTask* m_mtaskp; // The vertex in the heap
|
||||
uint32_t m_score; // The score of this entry
|
||||
void increase(uint32_t score) {
|
||||
uint64_t m_score; // The score of this entry
|
||||
void increase(uint64_t score) {
|
||||
UDEBUGONLY(UASSERT(score >= m_score, "Must increase"););
|
||||
m_score = score;
|
||||
}
|
||||
|
@ -861,7 +861,7 @@ private:
|
|||
}
|
||||
|
||||
public:
|
||||
void cpHasIncreased(V3GraphVertex* vxp, uint32_t newInclusiveCp) {
|
||||
void cpHasIncreased(V3GraphVertex* vxp, uint64_t newInclusiveCp) {
|
||||
constexpr GraphWay way{N_Way};
|
||||
constexpr GraphWay inv{way.invert()};
|
||||
|
||||
|
@ -877,13 +877,13 @@ public:
|
|||
relativep->m_edgeHeap[inv].increaseKey(&edgeHeapNode, newInclusiveCp);
|
||||
}
|
||||
|
||||
const uint32_t critPathCost = relativep->critPathCost(way);
|
||||
const uint64_t critPathCost = relativep->critPathCost(way);
|
||||
|
||||
if (critPathCost >= newInclusiveCp) continue;
|
||||
|
||||
// relativep's critPathCost() is out of step with its longest !wayward edge.
|
||||
// Schedule that to be resolved.
|
||||
const uint32_t newVal = newInclusiveCp - critPathCost;
|
||||
const uint64_t newVal = newInclusiveCp - critPathCost;
|
||||
|
||||
if (PendingHeapNode* const nodep = static_cast<PendingHeapNode*>(relativep->userp())) {
|
||||
// Already in heap. Increase score if needed.
|
||||
|
@ -924,16 +924,16 @@ public:
|
|||
m_pendingHeap.remove(maxp);
|
||||
// Pick up values
|
||||
LogicMTask* const mtaskp = maxp->key().m_mtaskp;
|
||||
const uint32_t cpGrowBy = maxp->key().m_score;
|
||||
const uint64_t cpGrowBy = maxp->key().m_score;
|
||||
// Free the heap node, we are done with it
|
||||
freeNode(maxp);
|
||||
mtaskp->userp(nullptr);
|
||||
// Update the critPathCost of mtaskp, that was out-of-date with respect to its edges
|
||||
const uint32_t startCp = mtaskp->critPathCost(way);
|
||||
const uint32_t newCp = startCp + cpGrowBy;
|
||||
const uint64_t startCp = mtaskp->critPathCost(way);
|
||||
const uint64_t newCp = startCp + cpGrowBy;
|
||||
if (VL_UNLIKELY(m_slowAsserts)) {
|
||||
// Check that CP matches that of the longest edge wayward of vxp.
|
||||
const uint32_t edgeCp = mtaskp->m_edgeHeap[inv].max()->key().m_score;
|
||||
const uint64_t edgeCp = mtaskp->m_edgeHeap[inv].max()->key().m_score;
|
||||
UASSERT_OBJ(edgeCp == newCp, mtaskp, "CP doesn't match longest wayward edge");
|
||||
// Confirm that we only set each node's CP once. That's an
|
||||
// important property of PropagateCp which allows it to be far
|
||||
|
@ -1114,15 +1114,17 @@ class Contraction final {
|
|||
// TYPES
|
||||
// New CP information for mtaskp reflecting an upcoming merge
|
||||
struct NewCp final {
|
||||
uint32_t cp;
|
||||
uint32_t propagateCp;
|
||||
uint64_t cp;
|
||||
uint64_t propagateCp;
|
||||
bool propagate;
|
||||
};
|
||||
|
||||
// MEMBERS
|
||||
V3Graph& m_mTaskGraph; // The Mtask graph
|
||||
uint32_t m_scoreLimit; // Sloppy score allowed when picking merges
|
||||
uint32_t m_scoreLimitBeforeRescore = 0xffffffff; // Next score rescore at
|
||||
uint64_t m_scoreLimit; // Sloppy score allowed when picking merges
|
||||
uint64_t m_scoreLimitBeforeRescore
|
||||
= std::numeric_limits<decltype(m_scoreLimitBeforeRescore)>::max(); // Next score rescore
|
||||
// at
|
||||
unsigned m_mergesSinceRescore = 0; // Merges since last rescore
|
||||
const bool m_slowAsserts; // Take extra time to validate algorithm
|
||||
MergeCandidateScoreboard m_sb; // Scoreboard
|
||||
|
@ -1135,7 +1137,7 @@ class Contraction final {
|
|||
|
||||
public:
|
||||
// CONSTRUCTORS
|
||||
Contraction(V3Graph& mTaskGraph, uint32_t scoreLimit, LogicMTask* entryMTaskp,
|
||||
Contraction(V3Graph& mTaskGraph, uint64_t scoreLimit, LogicMTask* entryMTaskp,
|
||||
LogicMTask* exitMTaskp, bool slowAsserts)
|
||||
: m_mTaskGraph{mTaskGraph}
|
||||
, m_scoreLimit{scoreLimit}
|
||||
|
@ -1199,9 +1201,9 @@ public:
|
|||
UASSERT(!m_sb.needsRescore(mergeCanp),
|
||||
"Need-rescore items should not be returned by bestp");
|
||||
}
|
||||
const uint32_t cachedScore = mergeCanp->score();
|
||||
const uint64_t cachedScore = mergeCanp->score();
|
||||
mergeCanp->rescore();
|
||||
const uint32_t actualScore = mergeCanp->score();
|
||||
const uint64_t actualScore = mergeCanp->score();
|
||||
|
||||
if (actualScore > cachedScore) {
|
||||
// Cached score is out-of-date.
|
||||
|
@ -1226,12 +1228,16 @@ public:
|
|||
// limit and keep going...
|
||||
const unsigned mtaskCount = m_mTaskGraph.vertices().size();
|
||||
if (mtaskCount > maxMTasks) {
|
||||
const uint32_t oldLimit = m_scoreLimit;
|
||||
const uint64_t oldLimit = m_scoreLimit;
|
||||
m_scoreLimit = (m_scoreLimit * 120) / 100;
|
||||
v3Global.rootp()->fileline()->v3warn(
|
||||
UNOPTTHREADS, "Thread scheduler is unable to provide requested "
|
||||
"parallelism; suggest asking for fewer threads.");
|
||||
UINFO(1,
|
||||
FileLine* const flp = v3Global.rootp()->fileline();
|
||||
if (!flp->warnIsOff(V3ErrorCode::UNOPTTHREADS)) {
|
||||
flp->v3warn(UNOPTTHREADS,
|
||||
"Thread scheduler is unable to provide requested "
|
||||
"parallelism; suggest asking for fewer threads.");
|
||||
flp->modifyWarnOff(V3ErrorCode::UNOPTTHREADS, true);
|
||||
}
|
||||
UINFO(6,
|
||||
"Critical path limit was=" << oldLimit << " now=" << m_scoreLimit);
|
||||
continue;
|
||||
}
|
||||
|
@ -1322,7 +1328,7 @@ private:
|
|||
// Return new wayward-CP for mtaskp reflecting its upcoming merge
|
||||
// with otherp. Set 'result.propagate' if mtaskp's wayward
|
||||
// relatives will see a new wayward CP from this merge.
|
||||
uint32_t newCp;
|
||||
uint64_t newCp;
|
||||
if (mergeEdgep) {
|
||||
if (mtaskp == mergeEdgep->furtherp<way>()) {
|
||||
newCp = std::max(otherp->critPathCost(way),
|
||||
|
@ -1335,8 +1341,8 @@ private:
|
|||
newCp = std::max(otherp->critPathCost(way), mtaskp->critPathCost(way));
|
||||
}
|
||||
|
||||
const uint32_t origRelativesCp = mtaskp->critPathCost(way) + mtaskp->stepCost();
|
||||
const uint32_t newRelativesCp
|
||||
const uint64_t origRelativesCp = mtaskp->critPathCost(way) + mtaskp->stepCost();
|
||||
const uint64_t newRelativesCp
|
||||
= newCp + LogicMTask::stepCost(mtaskp->cost() + otherp->cost());
|
||||
|
||||
NewCp result;
|
||||
|
@ -1506,7 +1512,8 @@ private:
|
|||
UINFO(6, "Did rescore. Merges since previous = " << m_mergesSinceRescore);
|
||||
|
||||
m_mergesSinceRescore = 0;
|
||||
m_scoreLimitBeforeRescore = 0xffffffff;
|
||||
m_scoreLimitBeforeRescore
|
||||
= std::numeric_limits<decltype(m_scoreLimitBeforeRescore)>::max();
|
||||
}
|
||||
|
||||
void makeSiblingMC(LogicMTask* ap, LogicMTask* bp) {
|
||||
|
@ -1545,8 +1552,8 @@ private:
|
|||
// functions are efficient enough and using more optimized methods (e.g.: sorting networks)
|
||||
// has no measurable benefit.
|
||||
struct alignas(16) SortingRecord final {
|
||||
uint64_t m_id;
|
||||
uint32_t m_cp;
|
||||
uint64_t m_cp;
|
||||
uint32_t m_id;
|
||||
uint8_t m_idx;
|
||||
static_assert(PART_SIBLING_EDGE_LIMIT <= std::numeric_limits<uint8_t>::max(),
|
||||
"m_idx must fit all indices into 'neighbors'");
|
||||
|
@ -1689,7 +1696,7 @@ public:
|
|||
selfTestChain();
|
||||
}
|
||||
|
||||
static void apply(V3Graph& mTaskGraph, uint32_t scoreLimit, LogicMTask* entryMTaskp,
|
||||
static void apply(V3Graph& mTaskGraph, uint64_t scoreLimit, LogicMTask* entryMTaskp,
|
||||
LogicMTask* exitMTaskp, bool slowAsserts) {
|
||||
Contraction{mTaskGraph, scoreLimit, entryMTaskp, exitMTaskp, slowAsserts};
|
||||
}
|
||||
|
@ -2056,18 +2063,18 @@ static void debugMTaskGraphStats(V3Graph& graph, const string& stage) {
|
|||
|
||||
UINFO(4, "\n");
|
||||
UINFO(4, " Stats for " << stage);
|
||||
uint32_t mtaskCount = 0;
|
||||
uint32_t totalCost = 0;
|
||||
std::array<uint32_t, 32> mtaskCostHist;
|
||||
mtaskCostHist.fill(0);
|
||||
uint64_t mtaskCount = 0;
|
||||
uint64_t totalCost = 0;
|
||||
constexpr int scoreBits = std::numeric_limits<uint64_t>::digits;
|
||||
std::array<uint64_t, scoreBits> mtaskCostHist{};
|
||||
for (const V3GraphVertex& mtask : graph.vertices()) {
|
||||
++mtaskCount;
|
||||
uint32_t mtaskCost = mtask.as<const LogicMTask>()->cost();
|
||||
uint64_t mtaskCost = mtask.as<const LogicMTask>()->cost();
|
||||
totalCost += mtaskCost;
|
||||
|
||||
unsigned log2Cost = 0;
|
||||
while (mtaskCost >>= 1) ++log2Cost;
|
||||
UASSERT(log2Cost < 32, "log2Cost overflow in debugMTaskGraphStats");
|
||||
UASSERT(log2Cost < scoreBits, "log2Cost overflow in debugMTaskGraphStats");
|
||||
++mtaskCostHist[log2Cost];
|
||||
}
|
||||
UINFO(4, " Total mtask cost = " << totalCost);
|
||||
|
@ -2075,7 +2082,7 @@ static void debugMTaskGraphStats(V3Graph& graph, const string& stage) {
|
|||
UINFO(4, " Avg cost / mtask = " << ((mtaskCount > 0) ? cvtToStr(totalCost / mtaskCount)
|
||||
: "INF!"));
|
||||
UINFO(4, " Histogram of mtask costs:");
|
||||
for (unsigned i = 0; i < 32; ++i) {
|
||||
for (unsigned i = 0; i < scoreBits; ++i) {
|
||||
if (mtaskCostHist[i]) {
|
||||
UINFO(4, " 2^" << i << ": " << mtaskCostHist[i]);
|
||||
V3Stats::addStat("MTask graph, " + stage + ", mtask cost 2^" + (i < 10 ? " " : "")
|
||||
|
@ -2185,8 +2192,8 @@ class Partitioner final {
|
|||
return fanIn + fanOut == 4;
|
||||
}
|
||||
|
||||
uint32_t setupMTaskDeps() VL_MT_DISABLED {
|
||||
uint32_t totalGraphCost = 0;
|
||||
uint64_t setupMTaskDeps() VL_MT_DISABLED {
|
||||
uint64_t totalGraphCost = 0;
|
||||
|
||||
// Artificial single entry point vertex in the MTask graph to allow sibling merges.
|
||||
// This is required as otherwise disjoint sub-graphs could not be merged, but the
|
||||
|
@ -2280,7 +2287,7 @@ class Partitioner final {
|
|||
// OrderMoveVertex. Over time, we'll merge MTasks together and
|
||||
// eventually each MTask will wrap a large number of MTaskMoveVertices
|
||||
// (and the logic nodes therein.)
|
||||
const uint32_t totalGraphCost = setupMTaskDeps();
|
||||
const uint64_t totalGraphCost = setupMTaskDeps();
|
||||
|
||||
debugMTaskGraphStats(*m_mTaskGraphp, "initial");
|
||||
|
||||
|
@ -2328,7 +2335,7 @@ class Partitioner final {
|
|||
// when scheduling them.
|
||||
const unsigned fudgeNumerator = 3;
|
||||
const unsigned fudgeDenominator = 5;
|
||||
const uint32_t cpLimit
|
||||
const uint64_t cpLimit
|
||||
= ((totalGraphCost * fudgeNumerator) / (targetParFactor * fudgeDenominator));
|
||||
UINFO(4, "Partitioner set cpLimit = " << cpLimit);
|
||||
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
// -*- mode: C++; c-file-style: "cc-mode" -*-
|
||||
//*************************************************************************
|
||||
//
|
||||
// Copyright 2025 by Antmicro. This program is free software; you can
|
||||
// redistribute it and/or modify it under the terms of either the GNU
|
||||
// Lesser General Public License Version 3 or the Perl Artistic License
|
||||
// Version 2.0.
|
||||
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
//
|
||||
//*************************************************************************
|
||||
|
||||
extern "C" void dpii_call(void) {}
|
|
@ -0,0 +1,27 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# Copyright 2025 by Wilson Snyder. This program is free software; you
|
||||
# can redistribute it and/or modify it under the terms of either the GNU
|
||||
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||
# Version 2.0.
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vltmt')
|
||||
test.clean_objs()
|
||||
|
||||
test.compile(
|
||||
v_flags2=["t/t_instr_count_dpi_large.cpp"],
|
||||
verilator_flags2=[
|
||||
"--instr-count-dpi 999999999",
|
||||
# Force UNOPTTHREADS error to cause Contraction limit increase beyond UINT32
|
||||
"--threads-max-mtasks 1",
|
||||
"-Wno-UNOPTTHREADS"
|
||||
],
|
||||
threads=2)
|
||||
|
||||
test.execute()
|
||||
|
||||
test.passes()
|
|
@ -0,0 +1,26 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||
// any use, without warranty, 2025 by Wilson Snyder.
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
|
||||
module t(clk);
|
||||
input clk;
|
||||
sub_0 sub_0(clk);
|
||||
sub_1 sub_1(clk);
|
||||
initial begin
|
||||
$write("*-* All Finished *-*\n");
|
||||
$finish;
|
||||
end
|
||||
endmodule
|
||||
|
||||
import "DPI-C" context function void dpii_call();
|
||||
|
||||
module sub_0(input clk); /*verilator hier_block*/
|
||||
always @(posedge clk) dpii_call();
|
||||
endmodule
|
||||
|
||||
module sub_1(input clk); /*verilator hier_block*/
|
||||
always @(posedge clk) dpii_call();
|
||||
endmodule
|
|
@ -0,0 +1,29 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# Copyright 2025 by Wilson Snyder. This program is free software; you
|
||||
# can redistribute it and/or modify it under the terms of either the GNU
|
||||
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||
# Version 2.0.
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vltmt')
|
||||
test.clean_objs()
|
||||
test.top_filename = "t/t_instr_count_dpi_large.v"
|
||||
|
||||
test.compile(
|
||||
v_flags2=["t/t_instr_count_dpi_large.cpp"],
|
||||
verilator_flags2=[
|
||||
"--hierarchical",
|
||||
"--instr-count-dpi 999999999",
|
||||
# Force UNOPTTHREADS error to cause Contraction limit increase beyond UINT32
|
||||
"--threads-max-mtasks 1",
|
||||
"-Wno-UNOPTTHREADS"
|
||||
],
|
||||
threads=2)
|
||||
|
||||
test.execute()
|
||||
|
||||
test.passes()
|
Loading…
Reference in New Issue