Add `--hierarchical-threads` (#6037)
This commit is contained in:
parent
c9fa54536e
commit
9cc4cc0efd
|
@ -383,6 +383,7 @@ detailed descriptions of these arguments.
|
|||
--help Show this help
|
||||
--hierarchical Enable hierarchical Verilation
|
||||
--hierarchical-params-file <name> Internal option that specifies parameters file for hier blocks
|
||||
--hierarchical-threads <threads> Number of threads for hierarchical scheduling
|
||||
-I<dir> Directory to search for includes
|
||||
--if-depth <value> Tune IFDEPTH warning
|
||||
+incdir+<dir> Directory to search for includes
|
||||
|
|
|
@ -777,6 +777,16 @@ Summary:
|
|||
for deparametrized modules with :option:`/*verilator&32;hier_block*/`
|
||||
metacomment. See :ref:`Hierarchical Verilation`.
|
||||
|
||||
.. option:: --hierarchical-threads <threads>
|
||||
|
||||
Specifies the number of threads used for scheduling hierarchical blocks.
|
||||
The main use-case of this option is to provide possiblity for scheduling
|
||||
multi-thread hierarchical blocks on multiple threads without increasing
|
||||
parallelism of the whole design.
|
||||
|
||||
Set to :vlopt:`--threads` by default. For optimal performance should not exceed
|
||||
CPU core count.
|
||||
|
||||
.. option:: -I<dir>
|
||||
|
||||
See :vlopt:`-y`.
|
||||
|
|
|
@ -503,8 +503,11 @@ class EmitCModel final : public EmitCFunc {
|
|||
+ "::hierName() const { return vlSymsp->name(); }\n");
|
||||
putns(modp, "const char* " + topClassName() + "::modelName() const { return \""
|
||||
+ topClassName() + "\"; }\n");
|
||||
const int threads = v3Global.opt.hierChild()
|
||||
? v3Global.opt.threads()
|
||||
: std::max(v3Global.opt.threads(), v3Global.opt.hierThreads());
|
||||
putns(modp, "unsigned " + topClassName() + "::threads() const { return "
|
||||
+ cvtToStr(v3Global.opt.threads()) + "; }\n");
|
||||
+ cvtToStr(threads) + "; }\n");
|
||||
putns(modp, "void " + topClassName()
|
||||
+ "::prepareClone() const { contextp()->prepareClone(); }\n");
|
||||
putns(modp, "void " + topClassName() + "::atClone() const {\n");
|
||||
|
|
|
@ -341,13 +341,16 @@ class PackThreads final {
|
|||
|
||||
// MEMBERS
|
||||
const uint32_t m_nThreads; // Number of threads
|
||||
const uint32_t m_nHierThreads; // Number of threads used for hierarchical tasks
|
||||
const uint32_t m_sandbagNumerator; // Numerator padding for est runtime
|
||||
const uint32_t m_sandbagDenom; // Denominator padding for est runtime
|
||||
|
||||
// CONSTRUCTORS
|
||||
explicit PackThreads(uint32_t nThreads = v3Global.opt.threads(),
|
||||
uint32_t nHierThreads = v3Global.opt.hierThreads(),
|
||||
unsigned sandbagNumerator = 30, unsigned sandbagDenom = 100)
|
||||
: m_nThreads{nThreads}
|
||||
, m_nHierThreads{nHierThreads}
|
||||
, m_sandbagNumerator{sandbagNumerator}
|
||||
, m_sandbagDenom{sandbagDenom} {}
|
||||
~PackThreads() = default;
|
||||
|
@ -419,7 +422,7 @@ class PackThreads final {
|
|||
SchedulingMode mode = SchedulingMode::SCHEDULING;
|
||||
|
||||
// Time each thread is occupied until
|
||||
std::vector<uint32_t> busyUntil(m_nThreads, 0);
|
||||
std::vector<uint32_t> busyUntil(std::max(m_nThreads, m_nHierThreads), 0);
|
||||
|
||||
// MTasks ready to be assigned next. All their dependencies are already assigned.
|
||||
std::set<ExecMTask*, MTaskCmp> readyMTasks;
|
||||
|
@ -479,7 +482,7 @@ class PackThreads final {
|
|||
|
||||
if (!bestMtaskp && mode == SchedulingMode::WIDE_TASK_DISCOVERED) {
|
||||
mode = SchedulingMode::WIDE_TASK_SCHEDULING;
|
||||
const uint32_t size = m_nThreads / maxThreadWorkers;
|
||||
const uint32_t size = m_nHierThreads / maxThreadWorkers;
|
||||
UASSERT(size, "Thread pool size should be bigger than 0");
|
||||
// If no tasks were added to the normal thread schedule, clear it.
|
||||
if (schedule.mtaskState.empty()) result.clear();
|
||||
|
@ -584,8 +587,9 @@ public:
|
|||
new V3GraphEdge{&graph, t3, t5, 1};
|
||||
new V3GraphEdge{&graph, t4, t6, 1};
|
||||
|
||||
constexpr uint32_t threads = 6;
|
||||
PackThreads packer{threads,
|
||||
constexpr uint32_t threads = 2;
|
||||
constexpr uint32_t hierThreads = 6;
|
||||
PackThreads packer{threads, hierThreads,
|
||||
3, // Sandbag numerator
|
||||
10}; // Sandbag denom
|
||||
|
||||
|
@ -599,7 +603,7 @@ public:
|
|||
UASSERT_SELFTEST(const ExecMTask*, scheduled[0].threads[0][0], t0);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[0].threads[0][1], t1);
|
||||
|
||||
UASSERT_SELFTEST(size_t, scheduled[1].threads.size(), threads / 3);
|
||||
UASSERT_SELFTEST(size_t, scheduled[1].threads.size(), hierThreads / 3);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][0], t2);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[0][1], t3);
|
||||
UASSERT_SELFTEST(const ExecMTask*, scheduled[1].threads[1][0], t4);
|
||||
|
@ -689,14 +693,15 @@ public:
|
|||
*/
|
||||
new V3GraphEdge{&graph, t0, t1, 1};
|
||||
|
||||
constexpr uint32_t threads = 2;
|
||||
PackThreads packer{threads,
|
||||
constexpr uint32_t threads = 1;
|
||||
constexpr uint32_t hierThreads = 2;
|
||||
PackThreads packer{threads, hierThreads,
|
||||
3, // Sandbag numerator
|
||||
10}; // Sandbag denom
|
||||
|
||||
const std::vector<ThreadSchedule> scheduled = packer.pack(graph);
|
||||
UASSERT_SELFTEST(size_t, scheduled.size(), 2);
|
||||
UASSERT_SELFTEST(size_t, scheduled[0].threads.size(), threads / 2);
|
||||
UASSERT_SELFTEST(size_t, scheduled[0].threads.size(), hierThreads / 2);
|
||||
UASSERT_SELFTEST(size_t, scheduled[0].threads[0].size(), 1);
|
||||
for (size_t i = 1; i < scheduled[0].threads.size(); ++i)
|
||||
UASSERT_SELFTEST(size_t, scheduled[0].threads[i].size(), 0);
|
||||
|
|
|
@ -1602,6 +1602,10 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc,
|
|||
m_threads = 1;
|
||||
}
|
||||
});
|
||||
DECL_OPTION("-hierarchical-threads", CbVal, [this, fl](const char* valp) {
|
||||
m_hierThreads = std::atoi(valp);
|
||||
if (m_hierThreads < 0) fl->v3fatal("--hierarchical-threads must be >= 0: " << valp);
|
||||
});
|
||||
DECL_OPTION("-threads-coarsen", OnOff, &m_threadsCoarsen).undocumented(); // Debug
|
||||
DECL_OPTION("-threads-dpi", CbVal, [this, fl](const char* valp) {
|
||||
if (!std::strcmp(valp, "all")) {
|
||||
|
|
|
@ -316,6 +316,7 @@ private:
|
|||
int m_expandLimit = 64; // main switch: --expand-limit
|
||||
int m_gateStmts = 100; // main switch: --gate-stmts
|
||||
int m_hierChild = 0; // main switch: --hierarchical-child
|
||||
int m_hierThreads = 0; // main switch: --hierarchical-threads
|
||||
int m_ifDepth = 0; // main switch: --if-depth
|
||||
int m_inlineMult = 2000; // main switch: --inline-mult
|
||||
int m_instrCountDpi = 200; // main switch: --instr-count-dpi
|
||||
|
@ -741,6 +742,7 @@ public:
|
|||
|
||||
bool hierarchical() const { return m_hierarchical; }
|
||||
int hierChild() const VL_MT_SAFE { return m_hierChild; }
|
||||
int hierThreads() const VL_MT_SAFE { return m_hierThreads == 0 ? m_threads : m_hierThreads; }
|
||||
bool hierTop() const VL_MT_SAFE { return !m_hierChild && !m_hierBlocks.empty(); }
|
||||
const V3HierBlockOptSet& hierBlocks() const { return m_hierBlocks; }
|
||||
// Directory to save .tree, .dot, .dat, .vpp for hierarchical block top
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
%Error: --hierarchical-threads must be >= 0: -2
|
||||
... See the manual at https://verilator.org/verilator_doc.html?v=latest for more assistance.
|
|
@ -0,0 +1,18 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# Copyright 2025 by Wilson Snyder. This program is free software; you
|
||||
# can redistribute it and/or modify it under the terms of either the GNU
|
||||
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||
# Version 2.0.
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt')
|
||||
|
||||
test.lint(fails=True,
|
||||
verilator_flags2=['--hierarchical-threads -2'],
|
||||
expect_filename=test.golden_filename)
|
||||
|
||||
test.passes()
|
|
@ -0,0 +1,42 @@
|
|||
#!/usr/bin/env python3
|
||||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
|
||||
#
|
||||
# Copyright 2025 by Wilson Snyder. This program is free software; you
|
||||
# can redistribute it and/or modify it under the terms of either the GNU
|
||||
# Lesser General Public License Version 3 or the Perl Artistic License
|
||||
# Version 2.0.
|
||||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
|
||||
|
||||
import vltest_bootstrap
|
||||
|
||||
test.scenarios('vlt_all')
|
||||
test.init_benchmarksim()
|
||||
test.cycles = (int(test.benchmark) if test.benchmark else 100000)
|
||||
test.sim_time = test.cycles * 10 + 1000
|
||||
|
||||
THREADS = 2
|
||||
HIER_BLOCK_THREADS = 2
|
||||
HIER_THREADS = 4
|
||||
|
||||
config_file = test.t_dir + "/" + test.name + ".vlt"
|
||||
|
||||
test.compile(
|
||||
benchmarksim=1,
|
||||
v_flags2=[
|
||||
config_file, "+define+SIM_CYCLES=" + str(test.cycles), "--hierarchical", "--stats",
|
||||
"-Wno-UNOPTFLAT",
|
||||
(f"-DWORKERS={HIER_BLOCK_THREADS}" if test.vltmt and HIER_BLOCK_THREADS > 1 else ""),
|
||||
(f"--hierarchical-threads {HIER_THREADS}" if test.vltmt and HIER_THREADS > 1 else "")
|
||||
],
|
||||
threads=(THREADS if test.vltmt else 1),
|
||||
context_threads=(max(HIER_THREADS, THREADS) if test.vltmt else 1))
|
||||
|
||||
if test.vltmt:
|
||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||
r'Optimizations, Thread schedule count\s+(\d+)', 1)
|
||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||
r'Optimizations, Thread schedule total tasks\s+(\d+)', 2)
|
||||
|
||||
test.execute()
|
||||
|
||||
test.passes()
|
|
@ -0,0 +1,212 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed under the Creative Commons Public Domain, for
|
||||
// any use, without warranty, 2025 by Antmicro.
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
// Based on tests emitted by t_gate_tree.py
|
||||
|
||||
module t (clk);
|
||||
input clk;
|
||||
|
||||
logic reset;
|
||||
|
||||
reg [255:0] v2_0;
|
||||
reg [255:0] v1_0;
|
||||
reg [255:0] v1_1;
|
||||
reg [255:0] v1_2;
|
||||
reg [255:0] v1_3;
|
||||
reg [255:0] v1_4;
|
||||
reg [255:0] v1_5;
|
||||
reg [255:0] v1_6;
|
||||
reg [255:0] v1_7;
|
||||
reg [255:0] dummy;
|
||||
|
||||
Calculate calc0(.clk(clk), .reset(reset), .v1_0(v1_0), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy));
|
||||
Calculate calc1(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(v1_1), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy));
|
||||
Calculate calc2(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(v1_2), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy));
|
||||
Calculate calc3(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(v1_3), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy));
|
||||
Calculate calc4(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(v1_4), .v1_5(dummy), .v1_6(dummy), .v1_7(dummy));
|
||||
Calculate calc5(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(v1_5), .v1_6(dummy), .v1_7(dummy));
|
||||
Calculate calc6(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(v1_6), .v1_7(dummy));
|
||||
Calculate calc7(.clk(clk), .reset(reset), .v1_0(dummy), .v1_1(dummy), .v1_2(dummy), .v1_3(dummy), .v1_4(dummy), .v1_5(dummy), .v1_6(dummy), .v1_7(v1_7));
|
||||
always @ (posedge clk) v2_0 <= v1_0 + v1_1 + v1_2 + v1_3 + v1_4 + v1_5 + v1_6 + v1_7;
|
||||
Check chk(.clk(clk), .reset(reset), .v2_0(v2_0));
|
||||
endmodule
|
||||
|
||||
module Check(input clk, output logic reset, input reg [255:0] v2_0);
|
||||
integer cyc=0;
|
||||
always @ (posedge clk) begin
|
||||
cyc <= cyc + 1;
|
||||
`ifdef TEST_VERBOSE
|
||||
$write("[%0t] rst=%0x v0_0=%0x v1_0=%0x result=%0x\n", $time, reset, v0_0, v1_0, v2_0);
|
||||
`endif
|
||||
if (cyc==0) begin
|
||||
reset <= 1;
|
||||
end
|
||||
else if (cyc==10) begin
|
||||
reset <= 0;
|
||||
end
|
||||
`ifndef SIM_CYCLES
|
||||
`define SIM_CYCLES 99
|
||||
`endif
|
||||
else if (cyc==`SIM_CYCLES) begin
|
||||
if (v2_0 != 256'd2017) $stop;
|
||||
$write("VARS=64 WIDTH=256 WORKINGSET=2KB\n");
|
||||
$write("*-* All Finished *-*\n");
|
||||
$finish;
|
||||
end
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
module Calculate(input clk,
|
||||
input reset,
|
||||
output reg [255:0] v1_0,
|
||||
output reg [255:0] v1_1,
|
||||
output reg [255:0] v1_2,
|
||||
output reg [255:0] v1_3,
|
||||
output reg [255:0] v1_4,
|
||||
output reg [255:0] v1_5,
|
||||
output reg [255:0] v1_6,
|
||||
output reg [255:0] v1_7
|
||||
);
|
||||
reg [255:0] v0_0;
|
||||
reg [255:0] v0_1;
|
||||
reg [255:0] v0_2;
|
||||
reg [255:0] v0_3;
|
||||
reg [255:0] v0_4;
|
||||
reg [255:0] v0_5;
|
||||
reg [255:0] v0_6;
|
||||
reg [255:0] v0_7;
|
||||
reg [255:0] v0_8;
|
||||
reg [255:0] v0_9;
|
||||
reg [255:0] v0_10;
|
||||
reg [255:0] v0_11;
|
||||
reg [255:0] v0_12;
|
||||
reg [255:0] v0_13;
|
||||
reg [255:0] v0_14;
|
||||
reg [255:0] v0_15;
|
||||
reg [255:0] v0_16;
|
||||
reg [255:0] v0_17;
|
||||
reg [255:0] v0_18;
|
||||
reg [255:0] v0_19;
|
||||
reg [255:0] v0_20;
|
||||
reg [255:0] v0_21;
|
||||
reg [255:0] v0_22;
|
||||
reg [255:0] v0_23;
|
||||
reg [255:0] v0_24;
|
||||
reg [255:0] v0_25;
|
||||
reg [255:0] v0_26;
|
||||
reg [255:0] v0_27;
|
||||
reg [255:0] v0_28;
|
||||
reg [255:0] v0_29;
|
||||
reg [255:0] v0_30;
|
||||
reg [255:0] v0_31;
|
||||
reg [255:0] v0_32;
|
||||
reg [255:0] v0_33;
|
||||
reg [255:0] v0_34;
|
||||
reg [255:0] v0_35;
|
||||
reg [255:0] v0_36;
|
||||
reg [255:0] v0_37;
|
||||
reg [255:0] v0_38;
|
||||
reg [255:0] v0_39;
|
||||
reg [255:0] v0_40;
|
||||
reg [255:0] v0_41;
|
||||
reg [255:0] v0_42;
|
||||
reg [255:0] v0_43;
|
||||
reg [255:0] v0_44;
|
||||
reg [255:0] v0_45;
|
||||
reg [255:0] v0_46;
|
||||
reg [255:0] v0_47;
|
||||
reg [255:0] v0_48;
|
||||
reg [255:0] v0_49;
|
||||
reg [255:0] v0_50;
|
||||
reg [255:0] v0_51;
|
||||
reg [255:0] v0_52;
|
||||
reg [255:0] v0_53;
|
||||
reg [255:0] v0_54;
|
||||
reg [255:0] v0_55;
|
||||
reg [255:0] v0_56;
|
||||
reg [255:0] v0_57;
|
||||
reg [255:0] v0_58;
|
||||
reg [255:0] v0_59;
|
||||
reg [255:0] v0_60;
|
||||
reg [255:0] v0_61;
|
||||
reg [255:0] v0_62;
|
||||
reg [255:0] v0_63;
|
||||
|
||||
always @ (posedge clk) v0_0 <= reset ? 256'd1 : v0_1;
|
||||
always @ (posedge clk) v0_1 <= reset ? 256'd1 : v0_2;
|
||||
always @ (posedge clk) v0_2 <= reset ? 256'd2 : v0_3;
|
||||
always @ (posedge clk) v0_3 <= reset ? 256'd3 : v0_4;
|
||||
always @ (posedge clk) v0_4 <= reset ? 256'd4 : v0_5;
|
||||
always @ (posedge clk) v0_5 <= reset ? 256'd5 : v0_6;
|
||||
always @ (posedge clk) v0_6 <= reset ? 256'd6 : v0_7;
|
||||
always @ (posedge clk) v0_7 <= reset ? 256'd7 : v0_0;
|
||||
always @ (posedge clk) v0_8 <= reset ? 256'd8 : v0_9;
|
||||
always @ (posedge clk) v0_9 <= reset ? 256'd9 : v0_10;
|
||||
always @ (posedge clk) v0_10 <= reset ? 256'd10 : v0_11;
|
||||
always @ (posedge clk) v0_11 <= reset ? 256'd11 : v0_12;
|
||||
always @ (posedge clk) v0_12 <= reset ? 256'd12 : v0_13;
|
||||
always @ (posedge clk) v0_13 <= reset ? 256'd13 : v0_14;
|
||||
always @ (posedge clk) v0_14 <= reset ? 256'd14 : v0_15;
|
||||
always @ (posedge clk) v0_15 <= reset ? 256'd15 : v0_8;
|
||||
always @ (posedge clk) v0_16 <= reset ? 256'd16 : v0_17;
|
||||
always @ (posedge clk) v0_17 <= reset ? 256'd17 : v0_18;
|
||||
always @ (posedge clk) v0_18 <= reset ? 256'd18 : v0_19;
|
||||
always @ (posedge clk) v0_19 <= reset ? 256'd19 : v0_20;
|
||||
always @ (posedge clk) v0_20 <= reset ? 256'd20 : v0_21;
|
||||
always @ (posedge clk) v0_21 <= reset ? 256'd21 : v0_22;
|
||||
always @ (posedge clk) v0_22 <= reset ? 256'd22 : v0_23;
|
||||
always @ (posedge clk) v0_23 <= reset ? 256'd23 : v0_16;
|
||||
always @ (posedge clk) v0_24 <= reset ? 256'd24 : v0_25;
|
||||
always @ (posedge clk) v0_25 <= reset ? 256'd25 : v0_26;
|
||||
always @ (posedge clk) v0_26 <= reset ? 256'd26 : v0_27;
|
||||
always @ (posedge clk) v0_27 <= reset ? 256'd27 : v0_28;
|
||||
always @ (posedge clk) v0_28 <= reset ? 256'd28 : v0_29;
|
||||
always @ (posedge clk) v0_29 <= reset ? 256'd29 : v0_30;
|
||||
always @ (posedge clk) v0_30 <= reset ? 256'd30 : v0_31;
|
||||
always @ (posedge clk) v0_31 <= reset ? 256'd31 : v0_24;
|
||||
always @ (posedge clk) v0_32 <= reset ? 256'd32 : v0_33;
|
||||
always @ (posedge clk) v0_33 <= reset ? 256'd33 : v0_34;
|
||||
always @ (posedge clk) v0_34 <= reset ? 256'd34 : v0_35;
|
||||
always @ (posedge clk) v0_35 <= reset ? 256'd35 : v0_36;
|
||||
always @ (posedge clk) v0_36 <= reset ? 256'd36 : v0_37;
|
||||
always @ (posedge clk) v0_37 <= reset ? 256'd37 : v0_38;
|
||||
always @ (posedge clk) v0_38 <= reset ? 256'd38 : v0_39;
|
||||
always @ (posedge clk) v0_39 <= reset ? 256'd39 : v0_32;
|
||||
always @ (posedge clk) v0_40 <= reset ? 256'd40 : v0_41;
|
||||
always @ (posedge clk) v0_41 <= reset ? 256'd41 : v0_42;
|
||||
always @ (posedge clk) v0_42 <= reset ? 256'd42 : v0_43;
|
||||
always @ (posedge clk) v0_43 <= reset ? 256'd43 : v0_44;
|
||||
always @ (posedge clk) v0_44 <= reset ? 256'd44 : v0_45;
|
||||
always @ (posedge clk) v0_45 <= reset ? 256'd45 : v0_46;
|
||||
always @ (posedge clk) v0_46 <= reset ? 256'd46 : v0_47;
|
||||
always @ (posedge clk) v0_47 <= reset ? 256'd47 : v0_40;
|
||||
always @ (posedge clk) v0_48 <= reset ? 256'd48 : v0_49;
|
||||
always @ (posedge clk) v0_49 <= reset ? 256'd49 : v0_50;
|
||||
always @ (posedge clk) v0_50 <= reset ? 256'd50 : v0_51;
|
||||
always @ (posedge clk) v0_51 <= reset ? 256'd51 : v0_52;
|
||||
always @ (posedge clk) v0_52 <= reset ? 256'd52 : v0_53;
|
||||
always @ (posedge clk) v0_53 <= reset ? 256'd53 : v0_54;
|
||||
always @ (posedge clk) v0_54 <= reset ? 256'd54 : v0_55;
|
||||
always @ (posedge clk) v0_55 <= reset ? 256'd55 : v0_48;
|
||||
always @ (posedge clk) v0_56 <= reset ? 256'd56 : v0_57;
|
||||
always @ (posedge clk) v0_57 <= reset ? 256'd57 : v0_58;
|
||||
always @ (posedge clk) v0_58 <= reset ? 256'd58 : v0_59;
|
||||
always @ (posedge clk) v0_59 <= reset ? 256'd59 : v0_60;
|
||||
always @ (posedge clk) v0_60 <= reset ? 256'd60 : v0_61;
|
||||
always @ (posedge clk) v0_61 <= reset ? 256'd61 : v0_62;
|
||||
always @ (posedge clk) v0_62 <= reset ? 256'd62 : v0_63;
|
||||
always @ (posedge clk) v0_63 <= reset ? 256'd63 : v0_56;
|
||||
|
||||
always @ (posedge clk) v1_0 <= v0_0 + v0_1 + v0_2 + v0_3 + v0_4 + v0_5 + v0_6 + v0_7;
|
||||
always @ (posedge clk) v1_1 <= v0_8 + v0_9 + v0_10 + v0_11 + v0_12 + v0_13 + v0_14 + v0_15;
|
||||
always @ (posedge clk) v1_2 <= v0_16 + v0_17 + v0_18 + v0_19 + v0_20 + v0_21 + v0_22 + v0_23;
|
||||
always @ (posedge clk) v1_3 <= v0_24 + v0_25 + v0_26 + v0_27 + v0_28 + v0_29 + v0_30 + v0_31;
|
||||
always @ (posedge clk) v1_4 <= v0_32 + v0_33 + v0_34 + v0_35 + v0_36 + v0_37 + v0_38 + v0_39;
|
||||
always @ (posedge clk) v1_5 <= v0_40 + v0_41 + v0_42 + v0_43 + v0_44 + v0_45 + v0_46 + v0_47;
|
||||
always @ (posedge clk) v1_6 <= v0_48 + v0_49 + v0_50 + v0_51 + v0_52 + v0_53 + v0_54 + v0_55;
|
||||
always @ (posedge clk) v1_7 <= v0_56 + v0_57 + v0_58 + v0_59 + v0_60 + v0_61 + v0_62 + v0_63;
|
||||
endmodule
|
|
@ -0,0 +1,14 @@
|
|||
// DESCRIPTION: Verilator: Verilog Test module
|
||||
//
|
||||
// This file ONLY is placed into the Public Domain, for any use,
|
||||
// without warranty, 2025 by Antmicro.
|
||||
// SPDX-License-Identifier: CC0-1.0
|
||||
|
||||
`verilator_config
|
||||
|
||||
hier_block -module "Calculate"
|
||||
hier_block -module "Check"
|
||||
|
||||
`ifdef WORKERS
|
||||
hier_workers -module "Calculate" -workers `WORKERS
|
||||
`endif
|
|
@ -14,9 +14,9 @@ test.init_benchmarksim()
|
|||
test.cycles = (int(test.benchmark) if test.benchmark else 100000)
|
||||
test.sim_time = test.cycles * 10 + 1000
|
||||
|
||||
THREADS = int(os.environ["THREADS"]) if "THREADS" in os.environ else 4
|
||||
HIER_BLOCK_THREADS = int(
|
||||
os.environ["HIER_BLOCK_THREADS"]) if "HIER_BLOCK_THREADS" in os.environ else 2
|
||||
THREADS = 2
|
||||
HIER_BLOCK_THREADS = 2
|
||||
HIER_THREADS = 4
|
||||
|
||||
config_file = test.t_dir + "/" + test.name + ".vlt"
|
||||
|
||||
|
@ -25,18 +25,20 @@ test.compile(
|
|||
v_flags2=[
|
||||
config_file, "+define+SIM_CYCLES=" + str(test.cycles), "--prof-exec", "--hierarchical",
|
||||
"--stats", "-Wno-UNOPTFLAT",
|
||||
(f"-DWORKERS={HIER_BLOCK_THREADS}" if test.vltmt and HIER_BLOCK_THREADS > 1 else "")
|
||||
(f"-DWORKERS={HIER_BLOCK_THREADS}" if test.vltmt and HIER_BLOCK_THREADS > 1 else ""),
|
||||
(f"--hierarchical-threads {HIER_THREADS}" if test.vltmt and HIER_THREADS > 1 else "")
|
||||
],
|
||||
threads=(THREADS if test.vltmt else 1))
|
||||
threads=(THREADS if test.vltmt else 1),
|
||||
context_threads=(HIER_THREADS if test.vltmt else 1))
|
||||
|
||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||
r'Optimizations, Hierarchical DPI wrappers with costs\s+(\d+)', 6)
|
||||
|
||||
if test.vltmt:
|
||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||
r'Optimizations, Thread schedule count\s+(\d+)', 4)
|
||||
r'Optimizations, Thread schedule count\s+(\d+)', 1)
|
||||
test.file_grep(test.obj_dir + "/V" + test.name + "__hier.dir/V" + test.name + "__stats.txt",
|
||||
r'Optimizations, Thread schedule total tasks\s+(\d+)', 12)
|
||||
r'Optimizations, Thread schedule total tasks\s+(\d+)', 2)
|
||||
|
||||
test.execute(all_run_flags=[
|
||||
"+verilator+prof+exec+start+2",
|
||||
|
|
Loading…
Reference in New Issue