Rework serial/parallel build mode

Instead of __ALLfast.cpp and __ALLslow.cpp, we now create only a single
__ALL.cpp and compile it with OPT_FAST, this speeds up small builds
where the C compiler does not dominate. A separate patch will follow
turning VM_PARALLEL_BUILDS on by default at a certain size.

Given this change to the build there is now no point in emitting both
fast and slow routines into the same .cpp file when --output-split is
not set as they will be just included in the same __ALL.cpp file. To
keep things simpler and the output easier to comprehend, V3EmitC has
also been changed to always emit the fast and slow files separately.

Also change verilated.mk to apply OPT_SLOW to all slow files, not just
ones called *__Slow.cpp. This change in particular ensures __Syms.cpp
is build as slow.

Part of #2360.
This commit is contained in:
Geza Lore 2020-05-25 11:35:06 +01:00
parent 5bb1da88ed
commit 9d7086067c
11 changed files with 39 additions and 48 deletions

View File

@ -2004,7 +2004,7 @@ depending on the operating system.
# Might be needed if SystemC 2.3.0
export SYSTEMC_CXX_FLAGS=-pthread
g++ -L$SYSTEMC_LIBDIR ../sc_main.o Vour__ALL*.o verilated.o \
g++ -L$SYSTEMC_LIBDIR ../sc_main.o Vour__ALL.a verilated.o \
-o Vour -lsystemc
And now we run it
@ -2187,8 +2187,7 @@ After running Make, the C++ compiler may produce the following:
{mod_prefix}{misc}.o // Intermediate objects
{prefix} // Final executable (w/--exe argument)
{prefix}__ALL.a // Library of all Verilated objects
{prefix}__ALLfast.cpp // Include of hot code for single compile
{prefix}__ALLslow.cpp // Include of slow code for single compile
{prefix}__ALL.cpp // Include of all code for single compile
{prefix}{misc}.d // Intermediate dependencies
{prefix}{misc}.o // Intermediate objects
@ -2772,8 +2771,7 @@ underneath NC:
cd obj_dir
ncsc_run \
sc_main.cpp \
Vour__ALLfast.cpp \
Vour__ALLslow.cpp \
Vour__ALL.cpp \
verilated.cpp
For larger designs you'll want to automate this using makefiles, which pull

View File

@ -93,12 +93,6 @@ LDLIBS += $(VM_USER_LDLIBS)
#OPT_FAST = -O
#OPT_FAST =
#######################################################################
##### Aggregates
VM_CLASSES += $(VM_CLASSES_FAST) $(VM_CLASSES_SLOW)
VM_SUPPORT += $(VM_SUPPORT_FAST) $(VM_SUPPORT_SLOW)
#######################################################################
##### SystemC builds
@ -163,35 +157,36 @@ ifneq ($(VK_LIBS_THREADED),0)
endif
#######################################################################
##### Stub
### Aggregates
preproc:
VM_FAST += $(VM_CLASSES_FAST) $(VM_SUPPORT_FAST)
VM_SLOW += $(VM_CLASSES_SLOW) $(VM_SUPPORT_SLOW)
#######################################################################
# Overall Objects Linking
### Overall Objects Linking
VK_CLASSES_FAST_CPP = $(addsuffix .cpp, $(VM_CLASSES_FAST))
VK_CLASSES_SLOW_CPP = $(addsuffix .cpp, $(VM_CLASSES_SLOW))
VK_SUPPORT_FAST_CPP = $(addsuffix .cpp, $(VM_SUPPORT_FAST))
VK_SUPPORT_SLOW_CPP = $(addsuffix .cpp, $(VM_SUPPORT_SLOW))
VK_FAST_OBJS = $(addsuffix .o, $(VM_FAST))
VK_SLOW_OBJS = $(addsuffix .o, $(VM_SLOW))
VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES))
VK_GLOBAL_OBJS = $(addsuffix .o, $(VM_GLOBAL_FAST) $(VM_GLOBAL_SLOW))
ifneq ($(VM_PARALLEL_BUILDS),1)
# Fast building, all .cpp's in one fell swoop
# This saves about 5 sec per module, but can be slower if only a little changes
VK_OBJS += $(VM_PREFIX)__ALLfast.o $(VM_PREFIX)__ALLslow.o
all_cpp: $(VM_PREFIX)__ALLfast.cpp $(VM_PREFIX)__ALLslow.cpp
$(VM_PREFIX)__ALLfast.cpp: $(VK_CLASSES_FAST_CPP) $(VK_SUPPORT_FAST_CPP)
$(VERILATOR_INCLUDER) -DVL_INCLUDE_OPT=include $^ > $@
$(VM_PREFIX)__ALLslow.cpp: $(VK_CLASSES_SLOW_CPP) $(VK_SUPPORT_SLOW_CPP)
# Fast build for small designs: All .cpp files in one fell swoop. This
# saves total compute, but can be slower if only a little changes. It is
# also a lot slower for medium to large designs when the speed of the C
# compiler dominates, which in this mode is not parallelizable.
VK_OBJS += $(VM_PREFIX)__ALL.o
$(VM_PREFIX)__ALL.cpp: $(addsuffix .cpp, $(VM_FAST) $(VM_SLOW))
$(VERILATOR_INCLUDER) -DVL_INCLUDE_OPT=include $^ > $@
all_cpp: $(VM_PREFIX)__ALL.cpp
else
#Slow way of building... Each .cpp file by itself
VK_OBJS += $(addsuffix .o, $(VM_CLASSES) $(VM_SUPPORT))
# Parallel build: Each .cpp file by itself. This can be somewhat slower for
# very small designs and examples, but is a lot faster for large designs.
VK_OBJS += $(VK_FAST_OBJS) $(VK_SLOW_OBJS)
endif
$(VM_PREFIX)__ALL.a: $(VK_OBJS)
@ -202,19 +197,15 @@ $(VM_PREFIX)__ALL.a: $(VK_OBJS)
### Compile rules
ifneq ($(VM_DEFAULT_RULES),0)
$(VM_PREFIX)__ALLfast.o: $(VM_PREFIX)__ALLfast.cpp
$(VM_PREFIX)__ALL.o: $(VM_PREFIX)__ALL.cpp
$(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
$(VM_PREFIX)__ALLslow.o: $(VM_PREFIX)__ALLslow.cpp
$(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_SLOW) -c -o $@ $<
# VM_GLOBAL_FAST files including verilated.o use this rule
# Anything not in $(VK_SLOW_OBJS), including verilated.o use this rule
%.o: %.cpp
$(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
%__Slow.o: %__Slow.cpp
$(VK_SLOW_OBJS): %.o: %.cpp
$(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_SLOW) -c -o $@ $<
endif
#Default rule embedded in make:

View File

@ -1749,7 +1749,7 @@ public:
m_fast = false;
}
virtual ~EmitCImp() {}
void mainImp(AstNodeModule* modp, bool slow, bool fast);
void mainImp(AstNodeModule* modp, bool slow);
void mainInt(AstNodeModule* modp);
void mainDoFunc(AstCFunc* nodep) { iterate(nodep); }
};
@ -3264,12 +3264,12 @@ void EmitCImp::mainInt(AstNodeModule* modp) {
VL_DO_CLEAR(delete m_ofp, m_ofp = NULL);
}
void EmitCImp::mainImp(AstNodeModule* modp, bool slow, bool fast) {
void EmitCImp::mainImp(AstNodeModule* modp, bool slow) {
// Output a module
AstNodeModule* fileModp = modp; // Filename constructed using this module
m_modp = modp;
m_slow = slow;
m_fast = fast;
m_fast = !slow;
UINFO(5, " Emitting " << prefixNameProtect(modp) << endl);
@ -3286,7 +3286,7 @@ void EmitCImp::mainImp(AstNodeModule* modp, bool slow, bool fast) {
m_modp = modp;
}
if (fast && modp->isTop() && v3Global.opt.mtasks()) {
if (m_fast && modp->isTop() && v3Global.opt.mtasks()) {
// Make a final pass and emit function definitions for the mtasks
// in the ExecGraph
AstExecGraph* execGraphp = v3Global.rootp()->execGraphp();
@ -3773,12 +3773,8 @@ void V3EmitC::emitc() {
if (VN_IS(nodep, Class)) continue; // Imped with ClassPackage
// clang-format off
{ EmitCImp cint; cint.mainInt(nodep); }
if (v3Global.opt.outputSplit()) {
{ EmitCImp fast; fast.mainImp(nodep, false, true); }
{ EmitCImp slow; slow.mainImp(nodep, true, false); }
} else {
{ EmitCImp both; both.mainImp(nodep, true, true); }
}
{ EmitCImp slow; slow.mainImp(nodep, true); }
{ EmitCImp fast; fast.mainImp(nodep, false); }
// clang-format on
}
}

View File

@ -111,7 +111,7 @@ sub check_gcc_flags {
chomp $line;
print ":log: $line\n" if $Self->{verbose};
if ($line =~ /\.cpp/) {
my $filetype = ($line =~ /Slow/) ? "slow":"fast";
my $filetype = ($line =~ /Slow|Syms/) ? "slow":"fast";
my $opt = ($line !~ /-O2/) ? "slow":"fast";
print "$filetype, $opt, $line\n" if $Self->{verbose};
if ($filetype ne $opt) {

View File

@ -18,7 +18,7 @@ execute(
check_finished => 1,
);
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/VL_RAND_RESET/);
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/VL_RAND_RESET/);
ok(1);
1;

View File

@ -18,7 +18,7 @@ execute(
check_finished => 1,
);
file_grep("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/VL_RAND_RESET/);
file_grep("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/VL_RAND_RESET/);
ok(1);
1;

View File

@ -21,11 +21,13 @@ execute(
# We expect all loops should be unrolled by verilator,
# none of the loop variables should exist in the output:
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/index_/);
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/index_/);
# Further, we expect that all logic within the loop should
# have been evaluated inside the compiler. So there should be
# no references to 'sum' in the .cpp.
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/sum/);
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/sum/);
ok(1);
1;

View File

@ -18,6 +18,7 @@ execute(
);
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/rstn_r/);
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/rstn_r/);
ok(1);
1;

View File

@ -18,6 +18,7 @@ execute(
);
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/rstn_r/);
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/rstn_r/);
ok(1);
1;

View File

@ -20,6 +20,7 @@
// Compile in place
#include "Vt_trace_two_b.cpp"
#include "Vt_trace_two_b__Slow.cpp"
#include "Vt_trace_two_b__Syms.cpp"
#include "Vt_trace_two_b__Trace.cpp"
#include "Vt_trace_two_b__Trace__Slow.cpp"

View File

@ -16,6 +16,7 @@
// Compile in place
#include "Vt_trace_two_b.cpp"
#include "Vt_trace_two_b__Slow.cpp"
#include "Vt_trace_two_b__Syms.cpp"
#include "Vt_trace_two_b__Trace.cpp"
#include "Vt_trace_two_b__Trace__Slow.cpp"