Rework serial/parallel build mode
Instead of __ALLfast.cpp and __ALLslow.cpp, we now create only a single __ALL.cpp and compile it with OPT_FAST, this speeds up small builds where the C compiler does not dominate. A separate patch will follow turning VM_PARALLEL_BUILDS on by default at a certain size. Given this change to the build there is now no point in emitting both fast and slow routines into the same .cpp file when --output-split is not set as they will be just included in the same __ALL.cpp file. To keep things simpler and the output easier to comprehend, V3EmitC has also been changed to always emit the fast and slow files separately. Also change verilated.mk to apply OPT_SLOW to all slow files, not just ones called *__Slow.cpp. This change in particular ensures __Syms.cpp is build as slow. Part of #2360.
This commit is contained in:
parent
5bb1da88ed
commit
9d7086067c
|
@ -2004,7 +2004,7 @@ depending on the operating system.
|
|||
# Might be needed if SystemC 2.3.0
|
||||
export SYSTEMC_CXX_FLAGS=-pthread
|
||||
|
||||
g++ -L$SYSTEMC_LIBDIR ../sc_main.o Vour__ALL*.o verilated.o \
|
||||
g++ -L$SYSTEMC_LIBDIR ../sc_main.o Vour__ALL.a verilated.o \
|
||||
-o Vour -lsystemc
|
||||
|
||||
And now we run it
|
||||
|
@ -2187,8 +2187,7 @@ After running Make, the C++ compiler may produce the following:
|
|||
{mod_prefix}{misc}.o // Intermediate objects
|
||||
{prefix} // Final executable (w/--exe argument)
|
||||
{prefix}__ALL.a // Library of all Verilated objects
|
||||
{prefix}__ALLfast.cpp // Include of hot code for single compile
|
||||
{prefix}__ALLslow.cpp // Include of slow code for single compile
|
||||
{prefix}__ALL.cpp // Include of all code for single compile
|
||||
{prefix}{misc}.d // Intermediate dependencies
|
||||
{prefix}{misc}.o // Intermediate objects
|
||||
|
||||
|
@ -2772,8 +2771,7 @@ underneath NC:
|
|||
cd obj_dir
|
||||
ncsc_run \
|
||||
sc_main.cpp \
|
||||
Vour__ALLfast.cpp \
|
||||
Vour__ALLslow.cpp \
|
||||
Vour__ALL.cpp \
|
||||
verilated.cpp
|
||||
|
||||
For larger designs you'll want to automate this using makefiles, which pull
|
||||
|
|
|
@ -93,12 +93,6 @@ LDLIBS += $(VM_USER_LDLIBS)
|
|||
#OPT_FAST = -O
|
||||
#OPT_FAST =
|
||||
|
||||
#######################################################################
|
||||
##### Aggregates
|
||||
|
||||
VM_CLASSES += $(VM_CLASSES_FAST) $(VM_CLASSES_SLOW)
|
||||
VM_SUPPORT += $(VM_SUPPORT_FAST) $(VM_SUPPORT_SLOW)
|
||||
|
||||
#######################################################################
|
||||
##### SystemC builds
|
||||
|
||||
|
@ -163,35 +157,36 @@ ifneq ($(VK_LIBS_THREADED),0)
|
|||
endif
|
||||
|
||||
#######################################################################
|
||||
##### Stub
|
||||
### Aggregates
|
||||
|
||||
preproc:
|
||||
VM_FAST += $(VM_CLASSES_FAST) $(VM_SUPPORT_FAST)
|
||||
VM_SLOW += $(VM_CLASSES_SLOW) $(VM_SUPPORT_SLOW)
|
||||
|
||||
#######################################################################
|
||||
# Overall Objects Linking
|
||||
### Overall Objects Linking
|
||||
|
||||
VK_CLASSES_FAST_CPP = $(addsuffix .cpp, $(VM_CLASSES_FAST))
|
||||
VK_CLASSES_SLOW_CPP = $(addsuffix .cpp, $(VM_CLASSES_SLOW))
|
||||
|
||||
VK_SUPPORT_FAST_CPP = $(addsuffix .cpp, $(VM_SUPPORT_FAST))
|
||||
VK_SUPPORT_SLOW_CPP = $(addsuffix .cpp, $(VM_SUPPORT_SLOW))
|
||||
VK_FAST_OBJS = $(addsuffix .o, $(VM_FAST))
|
||||
VK_SLOW_OBJS = $(addsuffix .o, $(VM_SLOW))
|
||||
|
||||
VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES))
|
||||
|
||||
VK_GLOBAL_OBJS = $(addsuffix .o, $(VM_GLOBAL_FAST) $(VM_GLOBAL_SLOW))
|
||||
|
||||
ifneq ($(VM_PARALLEL_BUILDS),1)
|
||||
# Fast building, all .cpp's in one fell swoop
|
||||
# This saves about 5 sec per module, but can be slower if only a little changes
|
||||
VK_OBJS += $(VM_PREFIX)__ALLfast.o $(VM_PREFIX)__ALLslow.o
|
||||
all_cpp: $(VM_PREFIX)__ALLfast.cpp $(VM_PREFIX)__ALLslow.cpp
|
||||
$(VM_PREFIX)__ALLfast.cpp: $(VK_CLASSES_FAST_CPP) $(VK_SUPPORT_FAST_CPP)
|
||||
$(VERILATOR_INCLUDER) -DVL_INCLUDE_OPT=include $^ > $@
|
||||
$(VM_PREFIX)__ALLslow.cpp: $(VK_CLASSES_SLOW_CPP) $(VK_SUPPORT_SLOW_CPP)
|
||||
# Fast build for small designs: All .cpp files in one fell swoop. This
|
||||
# saves total compute, but can be slower if only a little changes. It is
|
||||
# also a lot slower for medium to large designs when the speed of the C
|
||||
# compiler dominates, which in this mode is not parallelizable.
|
||||
|
||||
VK_OBJS += $(VM_PREFIX)__ALL.o
|
||||
$(VM_PREFIX)__ALL.cpp: $(addsuffix .cpp, $(VM_FAST) $(VM_SLOW))
|
||||
$(VERILATOR_INCLUDER) -DVL_INCLUDE_OPT=include $^ > $@
|
||||
all_cpp: $(VM_PREFIX)__ALL.cpp
|
||||
else
|
||||
#Slow way of building... Each .cpp file by itself
|
||||
VK_OBJS += $(addsuffix .o, $(VM_CLASSES) $(VM_SUPPORT))
|
||||
# Parallel build: Each .cpp file by itself. This can be somewhat slower for
|
||||
# very small designs and examples, but is a lot faster for large designs.
|
||||
|
||||
VK_OBJS += $(VK_FAST_OBJS) $(VK_SLOW_OBJS)
|
||||
endif
|
||||
|
||||
$(VM_PREFIX)__ALL.a: $(VK_OBJS)
|
||||
|
@ -202,19 +197,15 @@ $(VM_PREFIX)__ALL.a: $(VK_OBJS)
|
|||
### Compile rules
|
||||
|
||||
ifneq ($(VM_DEFAULT_RULES),0)
|
||||
$(VM_PREFIX)__ALLfast.o: $(VM_PREFIX)__ALLfast.cpp
|
||||
$(VM_PREFIX)__ALL.o: $(VM_PREFIX)__ALL.cpp
|
||||
$(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
|
||||
|
||||
$(VM_PREFIX)__ALLslow.o: $(VM_PREFIX)__ALLslow.cpp
|
||||
$(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_SLOW) -c -o $@ $<
|
||||
|
||||
# VM_GLOBAL_FAST files including verilated.o use this rule
|
||||
# Anything not in $(VK_SLOW_OBJS), including verilated.o use this rule
|
||||
%.o: %.cpp
|
||||
$(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
|
||||
|
||||
%__Slow.o: %__Slow.cpp
|
||||
$(VK_SLOW_OBJS): %.o: %.cpp
|
||||
$(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_SLOW) -c -o $@ $<
|
||||
|
||||
endif
|
||||
|
||||
#Default rule embedded in make:
|
||||
|
|
|
@ -1749,7 +1749,7 @@ public:
|
|||
m_fast = false;
|
||||
}
|
||||
virtual ~EmitCImp() {}
|
||||
void mainImp(AstNodeModule* modp, bool slow, bool fast);
|
||||
void mainImp(AstNodeModule* modp, bool slow);
|
||||
void mainInt(AstNodeModule* modp);
|
||||
void mainDoFunc(AstCFunc* nodep) { iterate(nodep); }
|
||||
};
|
||||
|
@ -3264,12 +3264,12 @@ void EmitCImp::mainInt(AstNodeModule* modp) {
|
|||
VL_DO_CLEAR(delete m_ofp, m_ofp = NULL);
|
||||
}
|
||||
|
||||
void EmitCImp::mainImp(AstNodeModule* modp, bool slow, bool fast) {
|
||||
void EmitCImp::mainImp(AstNodeModule* modp, bool slow) {
|
||||
// Output a module
|
||||
AstNodeModule* fileModp = modp; // Filename constructed using this module
|
||||
m_modp = modp;
|
||||
m_slow = slow;
|
||||
m_fast = fast;
|
||||
m_fast = !slow;
|
||||
|
||||
UINFO(5, " Emitting " << prefixNameProtect(modp) << endl);
|
||||
|
||||
|
@ -3286,7 +3286,7 @@ void EmitCImp::mainImp(AstNodeModule* modp, bool slow, bool fast) {
|
|||
m_modp = modp;
|
||||
}
|
||||
|
||||
if (fast && modp->isTop() && v3Global.opt.mtasks()) {
|
||||
if (m_fast && modp->isTop() && v3Global.opt.mtasks()) {
|
||||
// Make a final pass and emit function definitions for the mtasks
|
||||
// in the ExecGraph
|
||||
AstExecGraph* execGraphp = v3Global.rootp()->execGraphp();
|
||||
|
@ -3773,12 +3773,8 @@ void V3EmitC::emitc() {
|
|||
if (VN_IS(nodep, Class)) continue; // Imped with ClassPackage
|
||||
// clang-format off
|
||||
{ EmitCImp cint; cint.mainInt(nodep); }
|
||||
if (v3Global.opt.outputSplit()) {
|
||||
{ EmitCImp fast; fast.mainImp(nodep, false, true); }
|
||||
{ EmitCImp slow; slow.mainImp(nodep, true, false); }
|
||||
} else {
|
||||
{ EmitCImp both; both.mainImp(nodep, true, true); }
|
||||
}
|
||||
{ EmitCImp slow; slow.mainImp(nodep, true); }
|
||||
{ EmitCImp fast; fast.mainImp(nodep, false); }
|
||||
// clang-format on
|
||||
}
|
||||
}
|
||||
|
|
|
@ -111,7 +111,7 @@ sub check_gcc_flags {
|
|||
chomp $line;
|
||||
print ":log: $line\n" if $Self->{verbose};
|
||||
if ($line =~ /\.cpp/) {
|
||||
my $filetype = ($line =~ /Slow/) ? "slow":"fast";
|
||||
my $filetype = ($line =~ /Slow|Syms/) ? "slow":"fast";
|
||||
my $opt = ($line !~ /-O2/) ? "slow":"fast";
|
||||
print "$filetype, $opt, $line\n" if $Self->{verbose};
|
||||
if ($filetype ne $opt) {
|
||||
|
|
|
@ -18,7 +18,7 @@ execute(
|
|||
check_finished => 1,
|
||||
);
|
||||
|
||||
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/VL_RAND_RESET/);
|
||||
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/VL_RAND_RESET/);
|
||||
|
||||
ok(1);
|
||||
1;
|
||||
|
|
|
@ -18,7 +18,7 @@ execute(
|
|||
check_finished => 1,
|
||||
);
|
||||
|
||||
file_grep("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/VL_RAND_RESET/);
|
||||
file_grep("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/VL_RAND_RESET/);
|
||||
|
||||
ok(1);
|
||||
1;
|
||||
|
|
|
@ -21,11 +21,13 @@ execute(
|
|||
# We expect all loops should be unrolled by verilator,
|
||||
# none of the loop variables should exist in the output:
|
||||
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/index_/);
|
||||
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/index_/);
|
||||
|
||||
# Further, we expect that all logic within the loop should
|
||||
# have been evaluated inside the compiler. So there should be
|
||||
# no references to 'sum' in the .cpp.
|
||||
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/sum/);
|
||||
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/sum/);
|
||||
|
||||
ok(1);
|
||||
1;
|
||||
|
|
|
@ -18,6 +18,7 @@ execute(
|
|||
);
|
||||
|
||||
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/rstn_r/);
|
||||
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/rstn_r/);
|
||||
|
||||
ok(1);
|
||||
1;
|
||||
|
|
|
@ -18,6 +18,7 @@ execute(
|
|||
);
|
||||
|
||||
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/rstn_r/);
|
||||
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/rstn_r/);
|
||||
|
||||
ok(1);
|
||||
1;
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
// Compile in place
|
||||
#include "Vt_trace_two_b.cpp"
|
||||
#include "Vt_trace_two_b__Slow.cpp"
|
||||
#include "Vt_trace_two_b__Syms.cpp"
|
||||
#include "Vt_trace_two_b__Trace.cpp"
|
||||
#include "Vt_trace_two_b__Trace__Slow.cpp"
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
// Compile in place
|
||||
#include "Vt_trace_two_b.cpp"
|
||||
#include "Vt_trace_two_b__Slow.cpp"
|
||||
#include "Vt_trace_two_b__Syms.cpp"
|
||||
#include "Vt_trace_two_b__Trace.cpp"
|
||||
#include "Vt_trace_two_b__Trace__Slow.cpp"
|
||||
|
|
Loading…
Reference in New Issue