Rework serial/parallel build mode

Instead of __ALLfast.cpp and __ALLslow.cpp, we now create only a single
__ALL.cpp and compile it with OPT_FAST, this speeds up small builds
where the C compiler does not dominate. A separate patch will follow
turning VM_PARALLEL_BUILDS on by default at a certain size.

Given this change to the build there is now no point in emitting both
fast and slow routines into the same .cpp file when --output-split is
not set as they will be just included in the same __ALL.cpp file. To
keep things simpler and the output easier to comprehend, V3EmitC has
also been changed to always emit the fast and slow files separately.

Also change verilated.mk to apply OPT_SLOW to all slow files, not just
ones called *__Slow.cpp. This change in particular ensures __Syms.cpp
is build as slow.

Part of #2360.
This commit is contained in:
Geza Lore 2020-05-25 11:35:06 +01:00
parent 5bb1da88ed
commit 9d7086067c
11 changed files with 39 additions and 48 deletions

View File

@ -2004,7 +2004,7 @@ depending on the operating system.
# Might be needed if SystemC 2.3.0 # Might be needed if SystemC 2.3.0
export SYSTEMC_CXX_FLAGS=-pthread export SYSTEMC_CXX_FLAGS=-pthread
g++ -L$SYSTEMC_LIBDIR ../sc_main.o Vour__ALL*.o verilated.o \ g++ -L$SYSTEMC_LIBDIR ../sc_main.o Vour__ALL.a verilated.o \
-o Vour -lsystemc -o Vour -lsystemc
And now we run it And now we run it
@ -2187,8 +2187,7 @@ After running Make, the C++ compiler may produce the following:
{mod_prefix}{misc}.o // Intermediate objects {mod_prefix}{misc}.o // Intermediate objects
{prefix} // Final executable (w/--exe argument) {prefix} // Final executable (w/--exe argument)
{prefix}__ALL.a // Library of all Verilated objects {prefix}__ALL.a // Library of all Verilated objects
{prefix}__ALLfast.cpp // Include of hot code for single compile {prefix}__ALL.cpp // Include of all code for single compile
{prefix}__ALLslow.cpp // Include of slow code for single compile
{prefix}{misc}.d // Intermediate dependencies {prefix}{misc}.d // Intermediate dependencies
{prefix}{misc}.o // Intermediate objects {prefix}{misc}.o // Intermediate objects
@ -2772,8 +2771,7 @@ underneath NC:
cd obj_dir cd obj_dir
ncsc_run \ ncsc_run \
sc_main.cpp \ sc_main.cpp \
Vour__ALLfast.cpp \ Vour__ALL.cpp \
Vour__ALLslow.cpp \
verilated.cpp verilated.cpp
For larger designs you'll want to automate this using makefiles, which pull For larger designs you'll want to automate this using makefiles, which pull

View File

@ -93,12 +93,6 @@ LDLIBS += $(VM_USER_LDLIBS)
#OPT_FAST = -O #OPT_FAST = -O
#OPT_FAST = #OPT_FAST =
#######################################################################
##### Aggregates
VM_CLASSES += $(VM_CLASSES_FAST) $(VM_CLASSES_SLOW)
VM_SUPPORT += $(VM_SUPPORT_FAST) $(VM_SUPPORT_SLOW)
####################################################################### #######################################################################
##### SystemC builds ##### SystemC builds
@ -163,35 +157,36 @@ ifneq ($(VK_LIBS_THREADED),0)
endif endif
####################################################################### #######################################################################
##### Stub ### Aggregates
preproc: VM_FAST += $(VM_CLASSES_FAST) $(VM_SUPPORT_FAST)
VM_SLOW += $(VM_CLASSES_SLOW) $(VM_SUPPORT_SLOW)
####################################################################### #######################################################################
# Overall Objects Linking ### Overall Objects Linking
VK_CLASSES_FAST_CPP = $(addsuffix .cpp, $(VM_CLASSES_FAST)) VK_FAST_OBJS = $(addsuffix .o, $(VM_FAST))
VK_CLASSES_SLOW_CPP = $(addsuffix .cpp, $(VM_CLASSES_SLOW)) VK_SLOW_OBJS = $(addsuffix .o, $(VM_SLOW))
VK_SUPPORT_FAST_CPP = $(addsuffix .cpp, $(VM_SUPPORT_FAST))
VK_SUPPORT_SLOW_CPP = $(addsuffix .cpp, $(VM_SUPPORT_SLOW))
VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES)) VK_USER_OBJS = $(addsuffix .o, $(VM_USER_CLASSES))
VK_GLOBAL_OBJS = $(addsuffix .o, $(VM_GLOBAL_FAST) $(VM_GLOBAL_SLOW)) VK_GLOBAL_OBJS = $(addsuffix .o, $(VM_GLOBAL_FAST) $(VM_GLOBAL_SLOW))
ifneq ($(VM_PARALLEL_BUILDS),1) ifneq ($(VM_PARALLEL_BUILDS),1)
# Fast building, all .cpp's in one fell swoop # Fast build for small designs: All .cpp files in one fell swoop. This
# This saves about 5 sec per module, but can be slower if only a little changes # saves total compute, but can be slower if only a little changes. It is
VK_OBJS += $(VM_PREFIX)__ALLfast.o $(VM_PREFIX)__ALLslow.o # also a lot slower for medium to large designs when the speed of the C
all_cpp: $(VM_PREFIX)__ALLfast.cpp $(VM_PREFIX)__ALLslow.cpp # compiler dominates, which in this mode is not parallelizable.
$(VM_PREFIX)__ALLfast.cpp: $(VK_CLASSES_FAST_CPP) $(VK_SUPPORT_FAST_CPP)
$(VERILATOR_INCLUDER) -DVL_INCLUDE_OPT=include $^ > $@ VK_OBJS += $(VM_PREFIX)__ALL.o
$(VM_PREFIX)__ALLslow.cpp: $(VK_CLASSES_SLOW_CPP) $(VK_SUPPORT_SLOW_CPP) $(VM_PREFIX)__ALL.cpp: $(addsuffix .cpp, $(VM_FAST) $(VM_SLOW))
$(VERILATOR_INCLUDER) -DVL_INCLUDE_OPT=include $^ > $@ $(VERILATOR_INCLUDER) -DVL_INCLUDE_OPT=include $^ > $@
all_cpp: $(VM_PREFIX)__ALL.cpp
else else
#Slow way of building... Each .cpp file by itself # Parallel build: Each .cpp file by itself. This can be somewhat slower for
VK_OBJS += $(addsuffix .o, $(VM_CLASSES) $(VM_SUPPORT)) # very small designs and examples, but is a lot faster for large designs.
VK_OBJS += $(VK_FAST_OBJS) $(VK_SLOW_OBJS)
endif endif
$(VM_PREFIX)__ALL.a: $(VK_OBJS) $(VM_PREFIX)__ALL.a: $(VK_OBJS)
@ -202,19 +197,15 @@ $(VM_PREFIX)__ALL.a: $(VK_OBJS)
### Compile rules ### Compile rules
ifneq ($(VM_DEFAULT_RULES),0) ifneq ($(VM_DEFAULT_RULES),0)
$(VM_PREFIX)__ALLfast.o: $(VM_PREFIX)__ALLfast.cpp $(VM_PREFIX)__ALL.o: $(VM_PREFIX)__ALL.cpp
$(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $< $(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
$(VM_PREFIX)__ALLslow.o: $(VM_PREFIX)__ALLslow.cpp # Anything not in $(VK_SLOW_OBJS), including verilated.o use this rule
$(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_SLOW) -c -o $@ $<
# VM_GLOBAL_FAST files including verilated.o use this rule
%.o: %.cpp %.o: %.cpp
$(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $< $(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_FAST) -c -o $@ $<
%__Slow.o: %__Slow.cpp $(VK_SLOW_OBJS): %.o: %.cpp
$(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_SLOW) -c -o $@ $< $(OBJCACHE) $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(OPT_SLOW) -c -o $@ $<
endif endif
#Default rule embedded in make: #Default rule embedded in make:

View File

@ -1749,7 +1749,7 @@ public:
m_fast = false; m_fast = false;
} }
virtual ~EmitCImp() {} virtual ~EmitCImp() {}
void mainImp(AstNodeModule* modp, bool slow, bool fast); void mainImp(AstNodeModule* modp, bool slow);
void mainInt(AstNodeModule* modp); void mainInt(AstNodeModule* modp);
void mainDoFunc(AstCFunc* nodep) { iterate(nodep); } void mainDoFunc(AstCFunc* nodep) { iterate(nodep); }
}; };
@ -3264,12 +3264,12 @@ void EmitCImp::mainInt(AstNodeModule* modp) {
VL_DO_CLEAR(delete m_ofp, m_ofp = NULL); VL_DO_CLEAR(delete m_ofp, m_ofp = NULL);
} }
void EmitCImp::mainImp(AstNodeModule* modp, bool slow, bool fast) { void EmitCImp::mainImp(AstNodeModule* modp, bool slow) {
// Output a module // Output a module
AstNodeModule* fileModp = modp; // Filename constructed using this module AstNodeModule* fileModp = modp; // Filename constructed using this module
m_modp = modp; m_modp = modp;
m_slow = slow; m_slow = slow;
m_fast = fast; m_fast = !slow;
UINFO(5, " Emitting " << prefixNameProtect(modp) << endl); UINFO(5, " Emitting " << prefixNameProtect(modp) << endl);
@ -3286,7 +3286,7 @@ void EmitCImp::mainImp(AstNodeModule* modp, bool slow, bool fast) {
m_modp = modp; m_modp = modp;
} }
if (fast && modp->isTop() && v3Global.opt.mtasks()) { if (m_fast && modp->isTop() && v3Global.opt.mtasks()) {
// Make a final pass and emit function definitions for the mtasks // Make a final pass and emit function definitions for the mtasks
// in the ExecGraph // in the ExecGraph
AstExecGraph* execGraphp = v3Global.rootp()->execGraphp(); AstExecGraph* execGraphp = v3Global.rootp()->execGraphp();
@ -3773,12 +3773,8 @@ void V3EmitC::emitc() {
if (VN_IS(nodep, Class)) continue; // Imped with ClassPackage if (VN_IS(nodep, Class)) continue; // Imped with ClassPackage
// clang-format off // clang-format off
{ EmitCImp cint; cint.mainInt(nodep); } { EmitCImp cint; cint.mainInt(nodep); }
if (v3Global.opt.outputSplit()) { { EmitCImp slow; slow.mainImp(nodep, true); }
{ EmitCImp fast; fast.mainImp(nodep, false, true); } { EmitCImp fast; fast.mainImp(nodep, false); }
{ EmitCImp slow; slow.mainImp(nodep, true, false); }
} else {
{ EmitCImp both; both.mainImp(nodep, true, true); }
}
// clang-format on // clang-format on
} }
} }

View File

@ -111,7 +111,7 @@ sub check_gcc_flags {
chomp $line; chomp $line;
print ":log: $line\n" if $Self->{verbose}; print ":log: $line\n" if $Self->{verbose};
if ($line =~ /\.cpp/) { if ($line =~ /\.cpp/) {
my $filetype = ($line =~ /Slow/) ? "slow":"fast"; my $filetype = ($line =~ /Slow|Syms/) ? "slow":"fast";
my $opt = ($line !~ /-O2/) ? "slow":"fast"; my $opt = ($line !~ /-O2/) ? "slow":"fast";
print "$filetype, $opt, $line\n" if $Self->{verbose}; print "$filetype, $opt, $line\n" if $Self->{verbose};
if ($filetype ne $opt) { if ($filetype ne $opt) {

View File

@ -18,7 +18,7 @@ execute(
check_finished => 1, check_finished => 1,
); );
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/VL_RAND_RESET/); file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/VL_RAND_RESET/);
ok(1); ok(1);
1; 1;

View File

@ -18,7 +18,7 @@ execute(
check_finished => 1, check_finished => 1,
); );
file_grep("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/VL_RAND_RESET/); file_grep("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/VL_RAND_RESET/);
ok(1); ok(1);
1; 1;

View File

@ -21,11 +21,13 @@ execute(
# We expect all loops should be unrolled by verilator, # We expect all loops should be unrolled by verilator,
# none of the loop variables should exist in the output: # none of the loop variables should exist in the output:
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/index_/); file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/index_/);
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/index_/);
# Further, we expect that all logic within the loop should # Further, we expect that all logic within the loop should
# have been evaluated inside the compiler. So there should be # have been evaluated inside the compiler. So there should be
# no references to 'sum' in the .cpp. # no references to 'sum' in the .cpp.
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/sum/); file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/sum/);
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/sum/);
ok(1); ok(1);
1; 1;

View File

@ -18,6 +18,7 @@ execute(
); );
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/rstn_r/); file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/rstn_r/);
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/rstn_r/);
ok(1); ok(1);
1; 1;

View File

@ -18,6 +18,7 @@ execute(
); );
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/rstn_r/); file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}.cpp", qr/rstn_r/);
file_grep_not("$Self->{obj_dir}/$Self->{VM_PREFIX}__Slow.cpp", qr/rstn_r/);
ok(1); ok(1);
1; 1;

View File

@ -20,6 +20,7 @@
// Compile in place // Compile in place
#include "Vt_trace_two_b.cpp" #include "Vt_trace_two_b.cpp"
#include "Vt_trace_two_b__Slow.cpp"
#include "Vt_trace_two_b__Syms.cpp" #include "Vt_trace_two_b__Syms.cpp"
#include "Vt_trace_two_b__Trace.cpp" #include "Vt_trace_two_b__Trace.cpp"
#include "Vt_trace_two_b__Trace__Slow.cpp" #include "Vt_trace_two_b__Trace__Slow.cpp"

View File

@ -16,6 +16,7 @@
// Compile in place // Compile in place
#include "Vt_trace_two_b.cpp" #include "Vt_trace_two_b.cpp"
#include "Vt_trace_two_b__Slow.cpp"
#include "Vt_trace_two_b__Syms.cpp" #include "Vt_trace_two_b__Syms.cpp"
#include "Vt_trace_two_b__Trace.cpp" #include "Vt_trace_two_b__Trace.cpp"
#include "Vt_trace_two_b__Trace__Slow.cpp" #include "Vt_trace_two_b__Trace__Slow.cpp"