Merge branch 'master' into develop-v5

This commit is contained in:
Wilson Snyder 2022-06-04 11:58:13 -04:00
commit 0f324c8309
120 changed files with 1997 additions and 1372 deletions

View File

@ -29,7 +29,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, ubuntu-18.04]
os: [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04]
compiler:
- { cc: clang, cxx: clang++ }
- { cc: gcc, cxx: g++ }
@ -37,9 +37,11 @@ jobs:
exclude:
# Build pull requests only with ubuntu-20.04 and without m32
- os: ${{ github.event_name == 'pull_request' && 'ubuntu-18.04' || 'do-not-exclude' }}
- os: ${{ github.event_name == 'pull_request' && 'ubuntu-22.04' || 'do-not-exclude' }}
- m32: ${{ github.event_name == 'pull_request' && 1 || 'do-not-exclude' }}
# Build -m32 only on ubuntu-20.04
- {os: ubuntu-18.04, m32: 1}
- {os: ubuntu-22.04, m32: 1}
include:
# Build GCC 10 on ubuntu-20.04
- os: ubuntu-20.04
@ -95,7 +97,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, ubuntu-18.04]
os: [ubuntu-22.04, ubuntu-20.04, ubuntu-18.04]
compiler:
- { cc: clang, cxx: clang++ }
- { cc: gcc, cxx: g++ }
@ -104,9 +106,11 @@ jobs:
exclude:
# Build pull requests only with ubuntu-20.04 and without m32
- os: ${{ github.event_name == 'pull_request' && 'ubuntu-18.04' || 'do-not-exclude' }}
- os: ${{ github.event_name == 'pull_request' && 'ubuntu-22.04' || 'do-not-exclude' }}
- m32: ${{ github.event_name == 'pull_request' && 1 || 'do-not-exclude' }}
# Build -m32 only on ubuntu-20.04
- {os: ubuntu-18.04, m32: 1}
- {os: ubuntu-22.04, m32: 1}
include:
# Test with GCC 10 on ubuntu-20.04 without m32
- {os: ubuntu-20.04, compiler: { cc: gcc-10, cxx: g++-10 }, m32: 0, suite: dist-vlt-0}
@ -122,7 +126,7 @@ jobs:
CI_M32: ${{ matrix.m32 }}
CC: ${{ matrix.compiler.cc }}
CXX: ${{ matrix.compiler.cxx }}
CACHE_BASE_KEY: test-${{ matrix.os }}-${{ matrix.compiler.cc }}-m32=${{ matrix.m32 }}-${ matrix.suite }}
CACHE_BASE_KEY: test-${{ matrix.os }}-${{ matrix.compiler.cc }}-m32=${{ matrix.m32 }}-${{ matrix.suite }}
CCACHE_MAXSIZE: 64M # Per build matrix entry (2160M in total)
VERILATOR_ARCHIVE: verilator-${{ github.sha }}-${{ matrix.os }}-${{ matrix.compiler.cc }}${{ matrix.m32 && '-m32' || '' }}.tar.gz
steps:

12
Changes
View File

@ -22,12 +22,20 @@ Verilator 5.001 devel
Verilator 4.223 devel
==========================
**Major:**
* VCD tracing is now parallelized with --threads (#3449). [Geza Lore, Shunyao CAD]
**Minor:**
* Add -f<optimization> options to replace -O<letter> options (#3436).
* Changed --no-merge-const-pool to -fno-merge-const-pool (#3436).
* Support compile time trace signal selection with tracing_on/off (#3323). [Shunyao CAD]
* Add assert when VerilatedContext is mis-deleted (#3121). [Rupert Swarbrick]
* Define VM_TRACE_VCD when tracing in VCD format. [Geza Lore, Shunyao CAD]
* Support non-ANSI interface port declarations (#3439). [Geza Lore, Shunyao CAD]
* Support concat assignment to packed array (#3446).
* Improve conditional merging optimization (#3125). [Geza Lore, Shunyao CAD]
* Define VM_TRACE_VCD when tracing in VCD format. [Geza Lore, Shunyao CAD]
* Add assert when VerilatedContext is mis-deleted (#3121). [Rupert Swarbrick]
* Fix hang with large case statement optimization (#3405). [Mike Urbach]
* Fix 'with' operator with type casting (#3387). [xiak95]
* Fix incorrect conditional merging (#3409). [Raynard Qiao]

View File

@ -319,6 +319,7 @@ detailed descriptions of these arguments.
-f <file> Parse arguments from a file
-FI <file> Force include of a file
--flatten Force inlining of all modules, tasks and functions
-fno-<optimization> Disable internal optimization stage
-G<name>=<value> Overwrite top-level parameter
--gdb Run Verilator under GDB interactively
--gdbbt Run Verilator under GDB for backtrace
@ -344,7 +345,6 @@ detailed descriptions of these arguments.
--MMD Create .d dependency files
--MP Create phony dependency targets
--Mdir <directory> Name of output object directory
--no-merge-const-pool Disable merging of different types in const pool
--mod-prefix <topname> Name to prepend to lower classes
--no-clk <signal-name> Prevent marking specified signal as clock
--no-decoration Disable comments and symbol decorations
@ -404,7 +404,7 @@ detailed descriptions of these arguments.
--trace-max-width <width> Maximum array depth for tracing
--trace-params Enable tracing of parameters
--trace-structs Enable tracing structure names
--trace-threads <threads> Enable waveform creation on separate threads
--trace-threads <threads> Enable FST waveform creation on separate threads
--trace-underscore Enable tracing of _signals
-U<var> Undefine preprocessor define
--unroll-count <loops> Tune maximum loop iterations

View File

@ -54,8 +54,12 @@ if [ "$CI_BUILD_STAGE_NAME" = "build" ]; then
if [ "$CI_OS_NAME" = "linux" ]; then
sudo apt-get update
sudo apt-get install libfl-dev libgoogle-perftools-dev ccache
if [ "$CI_RUNS_ON" = "ubuntu-20.04" ]; then
sudo apt-get install libfl-dev ccache
if [ "$CI_RUNS_ON" != "ubuntu-22.04" ]; then
# Some conflict of libunwind verison on 22.04, can live without it for now
sudo apt-get install libgoogle-perftools-dev
fi
if [ "$CI_RUNS_ON" = "ubuntu-20.04" ] || [ "$CI_RUNS_ON" = "ubuntu-22.04" ]; then
sudo apt-get install libsystemc libsystemc-dev
fi
if [ "$COVERAGE" = 1 ]; then
@ -85,7 +89,7 @@ elif [ "$CI_BUILD_STAGE_NAME" = "test" ]; then
sudo apt-get update
# libfl-dev needed for internal coverage's test runs
sudo apt-get install gdb gtkwave lcov libfl-dev ccache
if [ "$CI_RUNS_ON" = "ubuntu-20.04" ]; then
if [ "$CI_RUNS_ON" = "ubuntu-20.04" ] || [ "$CI_RUNS_ON" = "ubuntu-22.04" ]; then
sudo apt-get install libsystemc-dev
fi
if [ "$CI_M32" = 1 ]; then

View File

@ -348,14 +348,18 @@ AC_SUBST(CFG_CXXFLAGS_PROFILE)
# Flag to select newest language standard supported
# Macros work such that first option that passes is the one we take
# Currently enabled c++14 due to packaged SystemC dependency
# c++14 is the newest that Verilator is regressed to support
# Currently enable c++17/c++14 due to packaged SystemC dependency
# c++17 is the newest that Verilator is regularly tested to support
# c++11 is the oldest that Verilator supports
# gnu is requried for Cygwin to compile verilated.h successfully
#_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++20)
#_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++20)
#_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++17)
#_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++17)
case "$(which lsb_release 2>&1 > /dev/null && lsb_release -d)" in
*Ubuntu*22.04*)
_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++17)
_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++17)
;;
esac
_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++14)
_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=c++14)
_MY_CXX_CHECK_SET(CFG_CXXFLAGS_STD_NEWEST,-std=gnu++11)

View File

@ -35,6 +35,7 @@ Guokai Chen
Harald Heckmann
Howard Su
Huang Rui
Huanghuang Zhou
HungMingWu
HyungKi Jeong
Iru Cai

View File

@ -20,6 +20,11 @@ Option `--cdc`
The experimental `--cdc` option is believed to be generally unused and is
planned for removal no sooner than January 2023.
Option `-O<letter>`
The debug `-O<letter>` options have been replaced with
`-fno-<optimization>` debug options to match GCC. The old options are
planned for removal no sooner than June 2023.
Option `--prof-threads`
The `--prof-threads` option has been superseded by the `--prof-exec` and
`--prof-pgo` options and is planned for removal no sooner than April 2023.

View File

@ -428,6 +428,52 @@ Summary:
flattening large designs may require significant CPU time, memory and
storage.
.. option:: -fno-acyc-simp
.. option:: -fno-assemble
.. option:: -fno-case
.. option:: -fno-combine
.. option:: -fno-const
.. option:: -fno-const-bit-op-tree
.. option:: -fno-dedup
.. option:: -fno-expand
.. option:: -fno-gate
.. option:: -fno-inline
.. option:: -fno-life
.. option:: -fno-life-post
.. option:: -fno-localize
.. option:: -fno-merge-cond
.. option:: -fno-merge-const-pool
.. option:: -fno-reloop
.. option:: -fno-reorder
.. option:: -fno-split
.. option:: -fno-subst
.. option:: -fno-subst-const
.. option:: -fno-table
Rarely needed. Disables one of the internal optimization steps. These
are typically used only when recommended by a maintainer to help debug
or work around an issue.
.. option:: -G<name>=<value>
Overwrites the given parameter of the toplevel module. The value is
@ -645,13 +691,6 @@ Summary:
The directory is created if it does not exist and the parent directories
exist; otherwise manually create the Mdir before calling Verilator.
.. option:: --no-merge-const-pool
Rarely needed. In order to minimize cache footprint, values of different
data type, that are yet emitted identically in C++ are merged in the
constant pool. This option disables this and causes every constant pool
entry with a distinct data type to be emitted separately.
.. option:: --mod-prefix <topname>
Specifies the name to prepend to all lower level classes. Defaults to
@ -700,9 +739,9 @@ Summary:
Rarely needed. Enables or disables a specific optimizations, with the
optimization selected based on the letter passed. A lowercase letter
disables an optimization, an upper case letter enables it. This is
intended for debugging use only; see the source code for
version-dependent mappings of optimizations to -O letters.
disables an optimization, an upper case letter enables it. This option
is deprecated and the various `-f<optimization>` arguments should be
used instead.
.. option:: -o <executable>
@ -1042,7 +1081,8 @@ Summary:
is not thread safe. With "--threads 1", the generated model is single
threaded but may run in a multithreaded environment. With "--threads N",
where N >= 2, the model is generated to run multithreaded on up to N
threads. See :ref:`Multithreading`.
threads. See :ref:`Multithreading`. This option also applies to
:vlopt:`--trace` (but not :vlopt:`--trace-fst`).
.. option:: --threads-dpi all
@ -1120,7 +1160,8 @@ Summary:
Having tracing compiled in may result in some small performance losses,
even when tracing is not turned on during model execution.
See also :vlopt:`--trace-threads` option.
When using :vlopt:`--threads`, VCD tracing is parallelized, using the
same number of threads as passed to :vlopt:`--threads`.
.. option:: --trace-coverage
@ -1174,12 +1215,12 @@ Summary:
.. option:: --trace-threads *threads*
Enable waveform tracing using separate threads. This is typically faster
in simulation runtime but uses more total compute. This option is
independent of, and works with, both :vlopt:`--trace` and
:vlopt:`--trace-fst`. Different trace formats can take advantage of
more trace threads to varying degrees. Currently VCD tracing can utilize
at most "--trace-threads 1", and FST tracing can utilize at most
"--trace-threads 2". This overrides :vlopt:`--no-threads` .
in simulation runtime but uses more total compute. This option only
applies to :vlopt:`--trace-fst`. FST tracing can utilize at most
"--trace-threads 2". This overrides :vlopt:`--no-threads`.
This option is accepted, but has absolutely no effect with
:vlopt:`--trace`, which respects :vlopt:`--threads` instead.
.. option:: --trace-underscore

View File

@ -72,23 +72,38 @@ a good thing for getting working silicon.
Will Verilator output remain under my own license/copyright?
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
Yes, it's just like using GCC on your programs; this is why Verilator uses
the "GNU **Lesser** Public License Version 3" instead of the more typical
"GNU Public License". See the licenses for details, but in brief, if you
change Verilator itself or the header files Verilator includes, you must
make the source code available under the GNU Lesser Public License.
However, Verilator output (the Verilated code) only "include"s the licensed
files, and so you are **not** required to open-source release any output
from Verilator.
Your SystemVerilog, VPI/DPI, or main() C++ code remains under your own license.
It's just like how using GCC on your programs does not change the copyright
of your program; this is why Verilator uses the "GNU **Lesser** Public
License Version 3" instead of the more typical "GNU Public License". See
the licenses for details.
Some examples:
* Any SystemVerilog or other input fed into Verilator remain your own.
* Any of your VPI/DPI C++ routines that Verilator calls remain your own.
* Any of your main() C++ code that calls into Verilator remain your own.
* If you change Verilator itself, for example changing or adding a file
under the src/ directory in the repository, you must make the source code
available under the GNU Lesser Public License.
* If you change a header Verilator provides, for example under include/ in
the repository, you must make the source code available under the GNU
Lesser Public License.
You also have the option of using the Perl Artistic License, which again
does not require you to release your Verilog or generated code, and also
allows you to modify Verilator for internal use without distributing the
modified version. But please contribute back to the community!
does not require you to release your Verilog, C++, or generated code. This
license also allows you to modify Verilator for internal use without
distributing the modified version. But please contribute back to the
community!
One limit is that you cannot under either license release a closed-source
Verilog simulation product incorporating Verilator. That is you can have a
commercial product, but must make the source code available.
Under both license you can offer a commercial product that is based on
Verilator either directly or embedded within. However under both licenses,
any changes you make to Verilator for such a product must be open sourced.
As is standard with Open Source, contributions back to Verilator will be
placed under the Verilator copyright and LGPL/Artistic license. Small test

View File

@ -221,9 +221,13 @@ model, it may be beneficial to performance to adjust the
influences the partitioning of the model by adjusting the assumed execution
time of DPI imports.
The :vlopt:`--trace-threads` options can be used to produce trace dumps
using multiple threads. If :vlopt:`--trace-threads` is set without
:vlopt:`--threads`, then :vlopt:`--trace-threads` will imply
When using :vlopt:`--trace` to perform VCD tracing, the VCD trace
construction is parallelized using the same number of threads as specified
with :vlopt:`--threads`, and is executed on the same thread pool as the model.
The :vlopt:`--trace-threads` options can be used with :vlopt:`--trace-fst`
to offload FST tracing using multiple threads. If :vlopt:`--trace-threads` is
given without :vlopt:`--threads`, then :vlopt:`--trace-threads` will imply
:vlopt:`--threads 1 <--threads>`, i.e.: the support libraries will be
thread safe.
@ -231,12 +235,12 @@ With :vlopt:`--trace-threads 0 <--trace-threads>`, trace dumps are produced
on the main thread. This again gives the highest single thread performance.
With :vlopt:`--trace-threads {N} <--trace-threads>`, where N is at least 1,
N additional threads will be created and managed by the trace files (e.g.:
VerilatedVcdC or VerilatedFstC), to generate the trace dump. The main
thread will be released to proceed with execution as soon as possible,
though some blocking of the main thread is still necessary while capturing
the trace. Different trace formats can utilize a various number of
threads. See the :vlopt:`--trace-threads` option.
up to N additional threads will be created and managed by the trace files
(e.g.: VerilatedFstC), to offload construction of the trace dump. The main
thread will be released to proceed with execution as soon as possible, though
some blocking of the main thread is still necessary while capturing the
trace. FST tracing can utilize up to 2 offload threads, so there is no use
of setting :vlopt:`--trace-threads` higher than 2 at the moment.
When running a multithreaded model, the default Linux task scheduler often
works against the model, by assuming threads are short lived, and thus
@ -441,7 +445,7 @@ SystemC include directories and link to the SystemC libraries.
.. describe:: TRACE_THREADS
Optional. Generated multi-threaded trace dumping, same as
Optional. Generated multi-threaded FST trace dumping, same as
"--trace-threads".
.. describe:: TOP_MODULE

View File

@ -595,7 +595,7 @@ path through the graph is the sum of macro-task execution costs. Sarkar
does almost the same thing, except that he has nonzero estimates for
synchronization costs.
Verilator's cost estimates are assigned by ``InstrCountCostVisitor``. This
Verilator's cost estimates are assigned by ``InstrCountVisitor``. This
class is perhaps the most fragile piece of the multithread
implementation. It's easy to have a bug where you count something cheap
(eg. accessing one element of a huge array) as if it were expensive (eg.

View File

@ -683,6 +683,7 @@ onehot
ooo
oprofile
oversubscription
parallelized
param
parameterized
params
@ -771,6 +772,7 @@ specparam
splitme
spp
sqrt
src
srcdir
srcfile
sscanf
@ -889,6 +891,7 @@ writeme
writemem
writememb
writememh
xiak
xin
xml
xnor

View File

@ -33,5 +33,5 @@ add_executable(example ../make_tracing_c/sim_main.cpp)
# Add the Verilated circuit to the target
verilate(example COVERAGE TRACE
INCLUDE_DIRS "../make_tracing_c"
VERILATOR_ARGS -f ../make_tracing_c/input.vc -Os -x-assign 0
VERILATOR_ARGS -f ../make_tracing_c/input.vc -x-assign fast
SOURCES ../make_tracing_c/top.v)

View File

@ -45,7 +45,7 @@ set_property(
# Add the Verilated circuit to the target
verilate(example SYSTEMC COVERAGE TRACE
INCLUDE_DIRS "../make_tracing_sc"
VERILATOR_ARGS -f ../make_tracing_sc/input.vc -Os -x-assign 0
VERILATOR_ARGS -f ../make_tracing_sc/input.vc -x-assign fast
SOURCES ../make_tracing_sc/top.v)
verilator_link_systemc(example)

View File

@ -33,7 +33,7 @@ VERILATOR_FLAGS =
# Generate C++
VERILATOR_FLAGS += -cc
# Optimize
VERILATOR_FLAGS += -Os -x-assign 0
VERILATOR_FLAGS += -x-assign fast
# Warn abount lint issues; may not want this on less solid designs
VERILATOR_FLAGS += -Wall
# This example does not use vl_time_stamp but rather

View File

@ -36,7 +36,7 @@ VERILATOR_FLAGS += -cc --exe
# Generate makefile dependencies (not shown as complicates the Makefile)
#VERILATOR_FLAGS += -MMD
# Optimize
VERILATOR_FLAGS += -Os -x-assign 0
VERILATOR_FLAGS += -x-assign fast
# Warn abount lint issues; may not want this on less solid designs
VERILATOR_FLAGS += -Wall
# Make waveforms

View File

@ -37,7 +37,7 @@ VERILATOR_FLAGS += -sc --exe
# Generate makefile dependencies (not shown as complicates the Makefile)
#VERILATOR_FLAGS += -MMD
# Optimize
VERILATOR_FLAGS += -Os -x-assign 0
VERILATOR_FLAGS += -x-assign fast
# Warn abount lint issues; may not want this on less solid designs
VERILATOR_FLAGS += -Wall
# Make waveforms

View File

@ -147,7 +147,7 @@ extern uint32_t VL_THREAD_ID() VL_MT_SAFE;
#if VL_THREADED
#define VL_LOCK_SPINS 50000 /// Number of times to spin for a mutex before relaxing
#define VL_LOCK_SPINS 50000 /// Number of times to spin for a mutex before yielding
/// Mutex, wrapped to allow -fthread_safety checks
class VL_CAPABILITY("mutex") VerilatedMutex final {

View File

@ -83,9 +83,11 @@ static_assert(static_cast<int>(FST_ST_VCD_PROGRAM) == static_cast<int>(VLT_TRACE
//=============================================================================
// Specialization of the generics for this trace format
#define VL_DERIVED_T VerilatedFst
#include "verilated_trace_imp.cpp"
#undef VL_DERIVED_T
#define VL_SUB_T VerilatedFst
#define VL_BUF_T VerilatedFstBuffer
#include "verilated_trace_imp.h"
#undef VL_SUB_T
#undef VL_BUF_T
//=============================================================================
// VerilatedFst
@ -111,7 +113,7 @@ void VerilatedFst::open(const char* filename) VL_MT_SAFE_EXCLUDES(m_mutex) {
m_curScope.clear();
VerilatedTrace<VerilatedFst>::traceInit();
Super::traceInit();
// Clear the scope stack
auto it = m_curScope.begin();
@ -133,14 +135,14 @@ void VerilatedFst::open(const char* filename) VL_MT_SAFE_EXCLUDES(m_mutex) {
void VerilatedFst::close() VL_MT_SAFE_EXCLUDES(m_mutex) {
const VerilatedLockGuard lock{m_mutex};
VerilatedTrace<VerilatedFst>::closeBase();
Super::closeBase();
fstWriterClose(m_fst);
m_fst = nullptr;
}
void VerilatedFst::flush() VL_MT_SAFE_EXCLUDES(m_mutex) {
const VerilatedLockGuard lock{m_mutex};
VerilatedTrace<VerilatedFst>::flushBase();
Super::flushBase();
fstWriterFlushContext(m_fst);
}
@ -162,7 +164,7 @@ void VerilatedFst::declare(uint32_t code, const char* name, int dtypenum, fstVar
int lsb) {
const int bits = ((msb > lsb) ? (msb - lsb) : (lsb - msb)) + 1;
const bool enabled = VerilatedTrace<VerilatedFst>::declCode(code, name, bits, false);
const bool enabled = Super::declCode(code, name, bits, false);
if (!enabled) return;
std::string nameasstr = namePrefix() + name;
@ -245,18 +247,42 @@ void VerilatedFst::declDouble(uint32_t code, const char* name, int dtypenum, fst
declare(code, name, dtypenum, vardir, vartype, array, arraynum, false, 63, 0);
}
//=============================================================================
// Get/commit trace buffer
VerilatedFstBuffer* VerilatedFst::getTraceBuffer() { return new VerilatedFstBuffer{*this}; }
void VerilatedFst::commitTraceBuffer(VerilatedFstBuffer* bufp) {
#ifdef VL_TRACE_OFFLOAD
if (bufp->m_offloadBufferWritep) {
m_offloadBufferWritep = bufp->m_offloadBufferWritep;
return; // Buffer will be deleted by the offload thread
}
#endif
delete bufp;
}
//=============================================================================
// VerilatedFstBuffer implementation
VerilatedFstBuffer::VerilatedFstBuffer(VerilatedFst& owner)
: VerilatedTraceBuffer<VerilatedFst, VerilatedFstBuffer>{owner} {}
//=============================================================================
// Trace rendering primitives
// Note: emit* are only ever called from one place (full* in
// verilated_trace_imp.cpp, which is included in this file at the top),
// verilated_trace_imp.h, which is included in this file at the top),
// so always inline them.
VL_ATTR_ALWINLINE
void VerilatedFst::emitBit(uint32_t code, CData newval) {
void VerilatedFstBuffer::emitBit(uint32_t code, CData newval) {
VL_DEBUG_IFDEF(assert(m_symbolp[code]););
fstWriterEmitValueChange(m_fst, m_symbolp[code], newval ? "1" : "0");
}
VL_ATTR_ALWINLINE
void VerilatedFst::emitCData(uint32_t code, CData newval, int bits) {
void VerilatedFstBuffer::emitCData(uint32_t code, CData newval, int bits) {
char buf[VL_BYTESIZE];
VL_DEBUG_IFDEF(assert(m_symbolp[code]););
cvtCDataToStr(buf, newval << (VL_BYTESIZE - bits));
@ -264,7 +290,7 @@ void VerilatedFst::emitCData(uint32_t code, CData newval, int bits) {
}
VL_ATTR_ALWINLINE
void VerilatedFst::emitSData(uint32_t code, SData newval, int bits) {
void VerilatedFstBuffer::emitSData(uint32_t code, SData newval, int bits) {
char buf[VL_SHORTSIZE];
VL_DEBUG_IFDEF(assert(m_symbolp[code]););
cvtSDataToStr(buf, newval << (VL_SHORTSIZE - bits));
@ -272,7 +298,7 @@ void VerilatedFst::emitSData(uint32_t code, SData newval, int bits) {
}
VL_ATTR_ALWINLINE
void VerilatedFst::emitIData(uint32_t code, IData newval, int bits) {
void VerilatedFstBuffer::emitIData(uint32_t code, IData newval, int bits) {
char buf[VL_IDATASIZE];
VL_DEBUG_IFDEF(assert(m_symbolp[code]););
cvtIDataToStr(buf, newval << (VL_IDATASIZE - bits));
@ -280,7 +306,7 @@ void VerilatedFst::emitIData(uint32_t code, IData newval, int bits) {
}
VL_ATTR_ALWINLINE
void VerilatedFst::emitQData(uint32_t code, QData newval, int bits) {
void VerilatedFstBuffer::emitQData(uint32_t code, QData newval, int bits) {
char buf[VL_QUADSIZE];
VL_DEBUG_IFDEF(assert(m_symbolp[code]););
cvtQDataToStr(buf, newval << (VL_QUADSIZE - bits));
@ -288,7 +314,7 @@ void VerilatedFst::emitQData(uint32_t code, QData newval, int bits) {
}
VL_ATTR_ALWINLINE
void VerilatedFst::emitWData(uint32_t code, const WData* newvalp, int bits) {
void VerilatedFstBuffer::emitWData(uint32_t code, const WData* newvalp, int bits) {
int words = VL_WORDS_I(bits);
char* wp = m_strbuf;
// Convert the most significant word
@ -304,6 +330,6 @@ void VerilatedFst::emitWData(uint32_t code, const WData* newvalp, int bits) {
}
VL_ATTR_ALWINLINE
void VerilatedFst::emitDouble(uint32_t code, double newval) {
void VerilatedFstBuffer::emitDouble(uint32_t code, double newval) {
fstWriterEmitValueChange(m_fst, m_symbolp[code], &newval);
}

View File

@ -31,15 +31,19 @@
#include <string>
#include <vector>
class VerilatedFstBuffer;
//=============================================================================
// VerilatedFst
// Base class to create a Verilator FST dump
// This is an internally used class - see VerilatedFstC for what to call from applications
class VerilatedFst final : public VerilatedTrace<VerilatedFst> {
class VerilatedFst final : public VerilatedTrace<VerilatedFst, VerilatedFstBuffer> {
public:
using Super = VerilatedTrace<VerilatedFst, VerilatedFstBuffer>;
private:
// Give the superclass access to private bits (to avoid virtual functions)
friend class VerilatedTrace<VerilatedFst>;
friend Buffer; // Give the buffer access to the private bits
//=========================================================================
// FST specific internals
@ -60,31 +64,26 @@ protected:
//=========================================================================
// Implementation of VerilatedTrace interface
// Implementations of protected virtual methods for VerilatedTrace
// Called when the trace moves forward to a new time point
virtual void emitTimeChange(uint64_t timeui) override;
// Hooks called from VerilatedTrace
virtual bool preFullDump() override { return isOpen(); }
virtual bool preChangeDump() override { return isOpen(); }
// Implementations of duck-typed methods for VerilatedTrace. These are
// called from only one place (namely full*) so always inline them.
inline void emitBit(uint32_t code, CData newval);
inline void emitCData(uint32_t code, CData newval, int bits);
inline void emitSData(uint32_t code, SData newval, int bits);
inline void emitIData(uint32_t code, IData newval, int bits);
inline void emitQData(uint32_t code, QData newval, int bits);
inline void emitWData(uint32_t code, const WData* newvalp, int bits);
inline void emitDouble(uint32_t code, double newval);
// Trace buffer management
virtual VerilatedFstBuffer* getTraceBuffer() override;
virtual void commitTraceBuffer(VerilatedFstBuffer*) override;
public:
//=========================================================================
// External interface to client code
// (All must be threadsafe)
// CONSTRUCTOR
explicit VerilatedFst(void* fst = nullptr);
~VerilatedFst();
// METHODS - All must be thread safe
// Open the file; call isOpen() to see if errors
void open(const char* filename) VL_MT_SAFE_EXCLUDES(m_mutex);
// Close the file
@ -97,11 +96,6 @@ public:
//=========================================================================
// Internal interface to Verilator generated code
// Inside dumping routines, declare a data type
void declDTypeEnum(int dtypenum, const char* name, uint32_t elements, unsigned int minValbits,
const char** itemNamesp, const char** itemValuesp);
// Inside dumping routines, declare a signal
void declBit(uint32_t code, const char* name, int dtypenum, fstVarDir vardir,
fstVarType vartype, bool array, int arraynum);
void declBus(uint32_t code, const char* name, int dtypenum, fstVarDir vardir,
@ -112,18 +106,55 @@ public:
fstVarType vartype, bool array, int arraynum, int msb, int lsb);
void declDouble(uint32_t code, const char* name, int dtypenum, fstVarDir vardir,
fstVarType vartype, bool array, int arraynum);
void declDTypeEnum(int dtypenum, const char* name, uint32_t elements, unsigned int minValbits,
const char** itemNamesp, const char** itemValuesp);
};
#ifndef DOXYGEN
// Declare specialization here as it's used in VerilatedFstC just below
template <> void VerilatedTrace<VerilatedFst>::dump(uint64_t timeui);
template <> void VerilatedTrace<VerilatedFst>::set_time_unit(const char* unitp);
template <> void VerilatedTrace<VerilatedFst>::set_time_unit(const std::string& unit);
template <> void VerilatedTrace<VerilatedFst>::set_time_resolution(const char* unitp);
template <> void VerilatedTrace<VerilatedFst>::set_time_resolution(const std::string& unit);
template <> void VerilatedTrace<VerilatedFst>::dumpvars(int level, const std::string& hier);
template <> void VerilatedFst::Super::dump(uint64_t time);
template <> void VerilatedFst::Super::set_time_unit(const char* unitp);
template <> void VerilatedFst::Super::set_time_unit(const std::string& unit);
template <> void VerilatedFst::Super::set_time_resolution(const char* unitp);
template <> void VerilatedFst::Super::set_time_resolution(const std::string& unit);
template <> void VerilatedFst::Super::dumpvars(int level, const std::string& hier);
#endif
//=============================================================================
// VerilatedFstBuffer
class VerilatedFstBuffer final : public VerilatedTraceBuffer<VerilatedFst, VerilatedFstBuffer> {
// Give the trace file access to the private bits
friend VerilatedFst;
friend VerilatedFst::Super;
// The FST file handle
void* const m_fst = m_owner.m_fst;
// code to fstHande map, as an array
const fstHandle* const m_symbolp = m_owner.m_symbolp;
// String buffer long enough to hold maxBits() chars
char* const m_strbuf = m_owner.m_strbuf;
public:
// CONSTRUCTOR
explicit VerilatedFstBuffer(VerilatedFst& owner);
~VerilatedFstBuffer() = default;
//=========================================================================
// Implementation of VerilatedTraceBuffer interface
// Implementations of duck-typed methods for VerilatedTraceBuffer. These are
// called from only one place (the full* methods), so always inline them.
VL_ATTR_ALWINLINE inline void emitBit(uint32_t code, CData newval);
VL_ATTR_ALWINLINE inline void emitCData(uint32_t code, CData newval, int bits);
VL_ATTR_ALWINLINE inline void emitSData(uint32_t code, SData newval, int bits);
VL_ATTR_ALWINLINE inline void emitIData(uint32_t code, IData newval, int bits);
VL_ATTR_ALWINLINE inline void emitQData(uint32_t code, QData newval, int bits);
VL_ATTR_ALWINLINE inline void emitWData(uint32_t code, const WData* newvalp, int bits);
VL_ATTR_ALWINLINE inline void emitDouble(uint32_t code, double newval);
};
//=============================================================================
// VerilatedFstC
/// Create a FST dump file in C standalone (no SystemC) simulations.

View File

@ -60,7 +60,7 @@ uint16_t VlExecutionRecord::getcpu() {
//=============================================================================
// VlExecutionProfiler implementation
template <size_t N> size_t roundUptoMultipleOf(size_t value) {
template <size_t N> static size_t roundUptoMultipleOf(size_t value) {
static_assert((N & (N - 1)) == 0, "'N' must be a power of 2");
size_t mask = N - 1;
return (value + mask) & ~mask;

View File

@ -22,28 +22,43 @@
#ifndef VERILATOR_VERILATED_TRACE_H_
#define VERILATOR_VERILATED_TRACE_H_
#ifdef VL_TRACE_THREADED
#define VL_TRACE_OFFLOAD
// clang-format off
// In FST mode, VL_TRACE_THREADED enables offloading, but only if we also have
// the FST writer thread. This means with --trace-threads 1, we get the FST
// writer thread only, and with --trace-threads 2 we get offloading as well
#if defined(VL_TRACE_FST_WRITER_THREAD) && defined(VL_TRACE_THREADED)
# define VL_TRACE_OFFLOAD
#endif
// VCD tracing can happen fully in parallel
#if defined(VM_TRACE_VCD) && VM_TRACE_VCD && defined(VL_TRACE_THREADED)
# define VL_TRACE_PARALLEL
#endif
// clang-format off
#if defined(VL_TRACE_PARALLEL) && defined(VL_TRACE_OFFLOAD)
# error "Cannot have VL_TRACE_PARALLEL and VL_TRACE_OFFLOAD together"
#endif
#include "verilated.h"
#include "verilated_trace_defs.h"
#include <bitset>
#include <condition_variable>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#ifdef VL_TRACE_OFFLOAD
# include <condition_variable>
# include <deque>
# include <thread>
#endif
// clang-format on
class VlThreadPool;
template <class T_Trace, class T_Buffer> class VerilatedTraceBuffer;
#ifdef VL_TRACE_OFFLOAD
//=============================================================================
// Offloaded tracing
@ -106,7 +121,8 @@ public:
CHG_WDATA = 0x6,
CHG_DOUBLE = 0x8,
// TODO: full..
TIME_CHANGE = 0xd,
TIME_CHANGE = 0xc,
TRACE_BUFFER = 0xd,
END = 0xe, // End of buffer
SHUTDOWN = 0xf // Shutdown worker thread, also marks end of buffer
};
@ -116,16 +132,22 @@ public:
//=============================================================================
// VerilatedTrace
// VerilatedTrace uses F-bounded polymorphism to access duck-typed
// implementations in the format specific derived class, which must be passed
// as the type parameter T_Derived
template <class T_Derived> class VerilatedTrace VL_NOT_FINAL {
// T_Trace is the format specific subclass of VerilatedTrace.
// T_Buffer is the format specific subclass of VerilatedTraceBuffer.
template <class T_Trace, class T_Buffer> class VerilatedTrace VL_NOT_FINAL {
// Give the buffer (both base and derived) access to the private bits
friend VerilatedTraceBuffer<T_Trace, T_Buffer>;
friend T_Buffer;
public:
using Buffer = T_Buffer;
//=========================================================================
// Generic tracing internals
using initCb_t = void (*)(void*, T_Derived*, uint32_t); // Type of init callbacks
using dumpCb_t = void (*)(void*, T_Derived*); // Type of all but init callbacks
using initCb_t = void (*)(void*, T_Trace*, uint32_t); // Type of init callbacks
using dumpCb_t = void (*)(void*, Buffer*); // Type of dump callbacks
using cleanupCb_t = void (*)(void*, T_Trace*); // Type of cleanup callbacks
private:
struct CallbackRecord {
@ -133,9 +155,10 @@ private:
// (the one in Ubuntu 14.04 with GCC 4.8.4 in particular) use the
// assignment operator on inserting into collections, so they don't work
// with const fields...
union {
initCb_t m_initCb; // The callback function
dumpCb_t m_dumpCb; // The callback function
union { // The callback
initCb_t m_initCb;
dumpCb_t m_dumpCb;
cleanupCb_t m_cleanupCb;
};
void* m_userp; // The user pointer to pass to the callback (the symbol table)
CallbackRecord(initCb_t cb, void* userp)
@ -144,32 +167,66 @@ private:
CallbackRecord(dumpCb_t cb, void* userp)
: m_dumpCb{cb}
, m_userp{userp} {}
CallbackRecord(cleanupCb_t cb, void* userp)
: m_cleanupCb{cb}
, m_userp{userp} {}
};
uint32_t* m_sigs_oldvalp; // Old value store
EData* m_sigs_enabledp; // Bit vector of enabled codes (nullptr = all on)
uint64_t m_timeLastDump; // Last time we did a dump
#ifdef VL_TRACE_PARALLEL
struct ParallelWorkerData {
const dumpCb_t m_cb; // The callback
void* const m_userp; // The use pointer to pass to the callback
Buffer* const m_bufp; // The buffer pointer to pass to the callback
std::atomic<bool> m_ready{false}; // The ready flag
mutable VerilatedMutex m_mutex; // Mutex for suspension until ready
std::condition_variable_any m_cv; // Condition variable for suspension
bool m_waiting VL_GUARDED_BY(m_mutex) = false; // Whether a thread is suspended in wait()
void wait();
ParallelWorkerData(dumpCb_t cb, void* userp, Buffer* bufp)
: m_cb{cb}
, m_userp{userp}
, m_bufp{bufp} {}
};
// Passed a ParallelWorkerData*, second argument is ignored
static void parallelWorkerTask(void*, bool);
#endif
using ParallelCallbackMap = std::unordered_map<VlThreadPool*, std::vector<CallbackRecord>>;
protected:
uint32_t* m_sigs_oldvalp = nullptr; // Previous value store
EData* m_sigs_enabledp = nullptr; // Bit vector of enabled codes (nullptr = all on)
private:
uint64_t m_timeLastDump = 0; // Last time we did a dump
std::vector<bool> m_sigs_enabledVec; // Staging for m_sigs_enabledp
std::vector<CallbackRecord> m_initCbs; // Routines to initialize traciong
std::vector<CallbackRecord> m_fullCbs; // Routines to perform full dump
std::vector<CallbackRecord> m_chgCbs; // Routines to perform incremental dump
std::vector<CallbackRecord> m_initCbs; // Routines to initialize tracing
ParallelCallbackMap m_fullCbs; // Routines to perform full dump
ParallelCallbackMap m_chgCbs; // Routines to perform incremental dump
std::vector<CallbackRecord> m_cleanupCbs; // Routines to call at the end of dump
bool m_fullDump; // Whether a full dump is required on the next call to 'dump'
uint32_t m_nextCode; // Next code number to assign
uint32_t m_numSignals; // Number of distinct signals
uint32_t m_maxBits; // Number of bits in the widest signal
std::vector<VlThreadPool*> m_threadPoolps; // All thread pools, in insertion order
bool m_fullDump = true; // Whether a full dump is required on the next call to 'dump'
uint32_t m_nextCode = 0; // Next code number to assign
uint32_t m_numSignals = 0; // Number of distinct signals
uint32_t m_maxBits = 0; // Number of bits in the widest signal
std::vector<std::string> m_namePrefixStack{""}; // Path prefixes to add to signal names
std::vector<std::pair<int, std::string>> m_dumpvars; // dumpvar() entries
char m_scopeEscape;
double m_timeRes; // Time resolution (ns/ms etc)
double m_timeUnit; // Time units (ns/ms etc)
char m_scopeEscape = '.';
double m_timeRes = 1e-9; // Time resolution (ns/ms etc)
double m_timeUnit = 1e-0; // Time units (ns/ms etc)
void addThreadPool(VlThreadPool* threadPoolp) VL_MT_SAFE_EXCLUDES(m_mutex);
void addCallbackRecord(std::vector<CallbackRecord>& cbVec, CallbackRecord& cbRec)
VL_MT_SAFE_EXCLUDES(m_mutex);
// Equivalent to 'this' but is of the sub-type 'T_Derived*'. Use 'self()->'
// Equivalent to 'this' but is of the sub-type 'T_Trace*'. Use 'self()->'
// to access duck-typed functions to avoid a virtual function call.
T_Derived* self() { return static_cast<T_Derived*>(this); }
T_Trace* self() { return static_cast<T_Trace*>(this); }
void runParallelCallbacks(const ParallelCallbackMap& cbMap);
// Flush any remaining data for this file
static void onFlush(void* selfp) VL_MT_UNSAFE_ONE;
@ -178,17 +235,21 @@ private:
#ifdef VL_TRACE_OFFLOAD
// Number of total offload buffers that have been allocated
uint32_t m_numOffloadBuffers;
uint32_t m_numOffloadBuffers = 0;
// Size of offload buffers
size_t m_offloadBufferSize;
size_t m_offloadBufferSize = 0;
// Buffers handed to worker for processing
VerilatedThreadQueue<uint32_t*> m_offloadBuffersToWorker;
// Buffers returned from worker after processing
VerilatedThreadQueue<uint32_t*> m_offloadBuffersFromWorker;
protected:
// Write pointer into current buffer
uint32_t* m_offloadBufferWritep;
uint32_t* m_offloadBufferWritep = nullptr;
// End of offload buffer
uint32_t* m_offloadBufferEndp;
uint32_t* m_offloadBufferEndp = nullptr;
private:
// The offload worker thread itself
std::unique_ptr<std::thread> m_workerThread;
@ -250,6 +311,10 @@ protected:
virtual bool preFullDump() = 0;
virtual bool preChangeDump() = 0;
// Trace buffer management
virtual Buffer* getTraceBuffer() = 0;
virtual void commitTraceBuffer(Buffer*) = 0;
public:
//=========================================================================
// External interface to client code
@ -270,19 +335,55 @@ public:
// Call
void dump(uint64_t timeui) VL_MT_SAFE_EXCLUDES(m_mutex);
//=========================================================================
// Internal interface to Verilator generated code
//=========================================================================
// Non-hot path internal interface to Verilator generated code
void addInitCb(initCb_t cb, void* userp) VL_MT_SAFE;
void addFullCb(dumpCb_t cb, void* userp) VL_MT_SAFE;
void addChgCb(dumpCb_t cb, void* userp) VL_MT_SAFE;
void addCleanupCb(dumpCb_t cb, void* userp) VL_MT_SAFE;
void addFullCb(dumpCb_t cb, void* userp, VlThreadPool* = nullptr) VL_MT_SAFE;
void addChgCb(dumpCb_t cb, void* userp, VlThreadPool* = nullptr) VL_MT_SAFE;
void addCleanupCb(cleanupCb_t cb, void* userp) VL_MT_SAFE;
void scopeEscape(char flag) { m_scopeEscape = flag; }
void pushNamePrefix(const std::string&);
void popNamePrefix(unsigned count = 1);
};
//=============================================================================
// VerilatedTraceBuffer
// T_Trace is the format specific subclass of VerilatedTrace.
// T_Buffer is the format specific subclass of VerilatedTraceBuffer.
// The format-specific hot-path methods use duck-typing via T_Buffer for performance.
template <class T_Trace, class T_Buffer> class VerilatedTraceBuffer VL_NOT_FINAL {
friend T_Trace; // Give the trace file access to the private bits
protected:
T_Trace& m_owner; // The VerilatedTrace subclass that owns this buffer
// Previous value store
uint32_t* const m_sigs_oldvalp = m_owner.m_sigs_oldvalp;
// Bit vector of enabled codes (nullptr = all on)
EData* const m_sigs_enabledp = m_owner.m_sigs_enabledp;
#ifdef VL_TRACE_OFFLOAD
// Write pointer into current buffer
uint32_t* m_offloadBufferWritep = m_owner.m_offloadBufferWritep;
// End of offload buffer
uint32_t* const m_offloadBufferEndp = m_owner.m_offloadBufferEndp;
#endif
// Equivalent to 'this' but is of the sub-type 'T_Derived*'. Use 'self()->'
// to access duck-typed functions to avoid a virtual function call.
inline T_Buffer* self() { return static_cast<T_Buffer*>(this); }
explicit VerilatedTraceBuffer(T_Trace& owner);
virtual ~VerilatedTraceBuffer() = default;
public:
//=========================================================================
// Hot path internal interface to Verilator generated code
@ -300,7 +401,7 @@ public:
// duck-typed void emitWData(uint32_t code, const WData* newvalp, int bits) = 0;
// duck-typed void emitDouble(uint32_t code, double newval) = 0;
uint32_t* oldp(uint32_t code) { return m_sigs_oldvalp + code; }
VL_ATTR_ALWINLINE inline uint32_t* oldp(uint32_t code) { return m_sigs_oldvalp + code; }
// Write to previous value buffer value and emit trace entry.
void fullBit(uint32_t* oldp, CData newval);
@ -363,9 +464,13 @@ public:
VL_DEBUG_IF(assert(m_offloadBufferWritep <= m_offloadBufferEndp););
}
#define CHG(name) chg##name##Impl
#else
#define CHG(name) chg##name
#define chgBit chgBitImpl
#define chgCData chgCDataImpl
#define chgSData chgSDataImpl
#define chgIData chgIDataImpl
#define chgQData chgQDataImpl
#define chgWData chgWDataImpl
#define chgDouble chgDoubleImpl
#endif
// In non-offload mode, these are called directly by the trace callbacks,
@ -373,27 +478,27 @@ public:
// thread and are called chg*Impl
// Check previous dumped value of signal. If changed, then emit trace entry
inline void CHG(Bit)(uint32_t* oldp, CData newval) {
VL_ATTR_ALWINLINE inline void chgBit(uint32_t* oldp, CData newval) {
const uint32_t diff = *oldp ^ newval;
if (VL_UNLIKELY(diff)) fullBit(oldp, newval);
}
inline void CHG(CData)(uint32_t* oldp, CData newval, int bits) {
VL_ATTR_ALWINLINE inline void chgCData(uint32_t* oldp, CData newval, int bits) {
const uint32_t diff = *oldp ^ newval;
if (VL_UNLIKELY(diff)) fullCData(oldp, newval, bits);
}
inline void CHG(SData)(uint32_t* oldp, SData newval, int bits) {
VL_ATTR_ALWINLINE inline void chgSData(uint32_t* oldp, SData newval, int bits) {
const uint32_t diff = *oldp ^ newval;
if (VL_UNLIKELY(diff)) fullSData(oldp, newval, bits);
}
inline void CHG(IData)(uint32_t* oldp, IData newval, int bits) {
VL_ATTR_ALWINLINE inline void chgIData(uint32_t* oldp, IData newval, int bits) {
const uint32_t diff = *oldp ^ newval;
if (VL_UNLIKELY(diff)) fullIData(oldp, newval, bits);
}
inline void CHG(QData)(uint32_t* oldp, QData newval, int bits) {
VL_ATTR_ALWINLINE inline void chgQData(uint32_t* oldp, QData newval, int bits) {
const uint64_t diff = *reinterpret_cast<QData*>(oldp) ^ newval;
if (VL_UNLIKELY(diff)) fullQData(oldp, newval, bits);
}
inline void CHG(WData)(uint32_t* oldp, const WData* newvalp, int bits) {
VL_ATTR_ALWINLINE inline void chgWData(uint32_t* oldp, const WData* newvalp, int bits) {
for (int i = 0; i < (bits + 31) / 32; ++i) {
if (VL_UNLIKELY(oldp[i] ^ newvalp[i])) {
fullWData(oldp, newvalp, bits);
@ -401,11 +506,20 @@ public:
}
}
}
inline void CHG(Double)(uint32_t* oldp, double newval) {
VL_ATTR_ALWINLINE inline void chgDouble(uint32_t* oldp, double newval) {
// cppcheck-suppress invalidPointerCast
if (VL_UNLIKELY(*reinterpret_cast<double*>(oldp) != newval)) fullDouble(oldp, newval);
}
#undef CHG
#ifdef VL_TRACE_OFFLOAD
#undef chgBit
#undef chgCData
#undef chgSData
#undef chgIData
#undef chgQData
#undef chgWData
#undef chgDouble
#endif
};
#endif // guard

View File

@ -10,26 +10,26 @@
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
//
//=============================================================================
///
/// \file
/// \brief Verilated common-format tracing implementation code
///
/// This file must be compiled and linked against all Verilated objects
/// that use --trace.
///
/// Use "verilator --trace" to add this to the Makefile for the linker.
///
//
// Verilated tracing implementation code template common to all formats.
// This file is included by the format specific implementations and
// should not be used otherwise.
//
//=============================================================================
// clang-format off
#ifndef VL_CPPCHECK
#ifndef VL_DERIVED_T
#if !defined(VL_SUB_T) || !defined(VL_BUF_T)
# error "This file should be included in trace format implementations"
#endif
#include "verilated_intrinsics.h"
#include "verilated_trace.h"
#ifdef VL_TRACE_PARALLEL
# include "verilated_threads.h"
# include <list>
#endif
#if 0
# include <iostream>
@ -82,7 +82,7 @@ static std::string doubleToTimescale(double value) {
//=========================================================================
// Buffer management
template <> uint32_t* VerilatedTrace<VL_DERIVED_T>::getOffloadBuffer() {
template <> uint32_t* VerilatedTrace<VL_SUB_T, VL_BUF_T>::getOffloadBuffer() {
uint32_t* bufferp;
// Some jitter is expected, so some number of alternative offlaod buffers are
// required, but don't allocate more than 8 buffers.
@ -101,7 +101,7 @@ template <> uint32_t* VerilatedTrace<VL_DERIVED_T>::getOffloadBuffer() {
return bufferp;
}
template <> void VerilatedTrace<VL_DERIVED_T>::waitForOffloadBuffer(const uint32_t* buffp) {
template <> void VerilatedTrace<VL_SUB_T, VL_BUF_T>::waitForOffloadBuffer(const uint32_t* buffp) {
// Slow path code only called on flush/shutdown, so use a simple algorithm.
// Collect buffers from worker and stash them until we get the one we want.
std::deque<uint32_t*> stash;
@ -116,7 +116,7 @@ template <> void VerilatedTrace<VL_DERIVED_T>::waitForOffloadBuffer(const uint32
//=========================================================================
// Worker thread
template <> void VerilatedTrace<VL_DERIVED_T>::offloadWorkerThreadMain() {
template <> void VerilatedTrace<VL_SUB_T, VL_BUF_T>::offloadWorkerThreadMain() {
bool shutdown = false;
do {
@ -127,6 +127,8 @@ template <> void VerilatedTrace<VL_DERIVED_T>::offloadWorkerThreadMain() {
const uint32_t* readp = bufferp;
std::unique_ptr<VL_BUF_T> traceBufp; // We own the passed tracebuffer
while (true) {
const uint32_t cmd = readp[0];
const uint32_t top = cmd >> 4;
@ -141,44 +143,44 @@ template <> void VerilatedTrace<VL_DERIVED_T>::offloadWorkerThreadMain() {
// CHG_* commands
case VerilatedTraceOffloadCommand::CHG_BIT_0:
VL_TRACE_OFFLOAD_DEBUG("Command CHG_BIT_0 " << top);
chgBitImpl(oldp, 0);
traceBufp->chgBitImpl(oldp, 0);
continue;
case VerilatedTraceOffloadCommand::CHG_BIT_1:
VL_TRACE_OFFLOAD_DEBUG("Command CHG_BIT_1 " << top);
chgBitImpl(oldp, 1);
traceBufp->chgBitImpl(oldp, 1);
continue;
case VerilatedTraceOffloadCommand::CHG_CDATA:
VL_TRACE_OFFLOAD_DEBUG("Command CHG_CDATA " << top);
// Bits stored in bottom byte of command
chgCDataImpl(oldp, *readp, top);
traceBufp->chgCDataImpl(oldp, *readp, top);
readp += 1;
continue;
case VerilatedTraceOffloadCommand::CHG_SDATA:
VL_TRACE_OFFLOAD_DEBUG("Command CHG_SDATA " << top);
// Bits stored in bottom byte of command
chgSDataImpl(oldp, *readp, top);
traceBufp->chgSDataImpl(oldp, *readp, top);
readp += 1;
continue;
case VerilatedTraceOffloadCommand::CHG_IDATA:
VL_TRACE_OFFLOAD_DEBUG("Command CHG_IDATA " << top);
// Bits stored in bottom byte of command
chgIDataImpl(oldp, *readp, top);
traceBufp->chgIDataImpl(oldp, *readp, top);
readp += 1;
continue;
case VerilatedTraceOffloadCommand::CHG_QDATA:
VL_TRACE_OFFLOAD_DEBUG("Command CHG_QDATA " << top);
// Bits stored in bottom byte of command
chgQDataImpl(oldp, *reinterpret_cast<const QData*>(readp), top);
traceBufp->chgQDataImpl(oldp, *reinterpret_cast<const QData*>(readp), top);
readp += 2;
continue;
case VerilatedTraceOffloadCommand::CHG_WDATA:
VL_TRACE_OFFLOAD_DEBUG("Command CHG_WDATA " << top);
chgWDataImpl(oldp, readp, top);
traceBufp->chgWDataImpl(oldp, readp, top);
readp += VL_WORDS_I(top);
continue;
case VerilatedTraceOffloadCommand::CHG_DOUBLE:
VL_TRACE_OFFLOAD_DEBUG("Command CHG_DOUBLE " << top);
chgDoubleImpl(oldp, *reinterpret_cast<const double*>(readp));
traceBufp->chgDoubleImpl(oldp, *reinterpret_cast<const double*>(readp));
readp += 2;
continue;
@ -191,9 +193,18 @@ template <> void VerilatedTrace<VL_DERIVED_T>::offloadWorkerThreadMain() {
readp += 2;
continue;
case VerilatedTraceOffloadCommand::TRACE_BUFFER:
VL_TRACE_OFFLOAD_DEBUG("Command TRACE_BUFFER " << top);
readp -= 1; // No code in this command, undo increment
traceBufp.reset(*reinterpret_cast<VL_BUF_T* const*>(readp));
readp += 2;
continue;
//===
// Commands ending this buffer
case VerilatedTraceOffloadCommand::END: VL_TRACE_OFFLOAD_DEBUG("Command END"); break;
case VerilatedTraceOffloadCommand::END: //
VL_TRACE_OFFLOAD_DEBUG("Command END");
break;
case VerilatedTraceOffloadCommand::SHUTDOWN:
VL_TRACE_OFFLOAD_DEBUG("Command SHUTDOWN");
shutdown = true;
@ -202,8 +213,7 @@ template <> void VerilatedTrace<VL_DERIVED_T>::offloadWorkerThreadMain() {
//===
// Unknown command
default: { // LCOV_EXCL_START
VL_TRACE_OFFLOAD_DEBUG("Command UNKNOWN");
VL_PRINTF_MT("Trace command: 0x%08x\n", cmd);
VL_TRACE_OFFLOAD_DEBUG("Command UNKNOWN " << cmd);
VL_FATAL_MT(__FILE__, __LINE__, "", "Unknown trace command");
break;
} // LCOV_EXCL_STOP
@ -221,7 +231,7 @@ template <> void VerilatedTrace<VL_DERIVED_T>::offloadWorkerThreadMain() {
} while (VL_LIKELY(!shutdown));
}
template <> void VerilatedTrace<VL_DERIVED_T>::shutdownOffloadWorker() {
template <> void VerilatedTrace<VL_SUB_T, VL_BUF_T>::shutdownOffloadWorker() {
// If the worker thread is not running, done..
if (!m_workerThread) return;
@ -241,7 +251,7 @@ template <> void VerilatedTrace<VL_DERIVED_T>::shutdownOffloadWorker() {
//=============================================================================
// Life cycle
template <> void VerilatedTrace<VL_DERIVED_T>::closeBase() {
template <> void VerilatedTrace<VL_SUB_T, VL_BUF_T>::closeBase() {
#ifdef VL_TRACE_OFFLOAD
shutdownOffloadWorker();
while (m_numOffloadBuffers) {
@ -251,7 +261,7 @@ template <> void VerilatedTrace<VL_DERIVED_T>::closeBase() {
#endif
}
template <> void VerilatedTrace<VL_DERIVED_T>::flushBase() {
template <> void VerilatedTrace<VL_SUB_T, VL_BUF_T>::flushBase() {
#ifdef VL_TRACE_OFFLOAD
// Hand an empty buffer to the worker thread
uint32_t* const bufferp = getOffloadBuffer();
@ -266,46 +276,29 @@ template <> void VerilatedTrace<VL_DERIVED_T>::flushBase() {
//=============================================================================
// Callbacks to run on global events
template <> void VerilatedTrace<VL_DERIVED_T>::onFlush(void* selfp) {
template <> void VerilatedTrace<VL_SUB_T, VL_BUF_T>::onFlush(void* selfp) {
// This calls 'flush' on the derived class (which must then get any mutex)
reinterpret_cast<VL_DERIVED_T*>(selfp)->flush();
reinterpret_cast<VL_SUB_T*>(selfp)->flush();
}
template <> void VerilatedTrace<VL_DERIVED_T>::onExit(void* selfp) {
template <> void VerilatedTrace<VL_SUB_T, VL_BUF_T>::onExit(void* selfp) {
// This calls 'close' on the derived class (which must then get any mutex)
reinterpret_cast<VL_DERIVED_T*>(selfp)->close();
reinterpret_cast<VL_SUB_T*>(selfp)->close();
}
//=============================================================================
// VerilatedTrace
template <>
VerilatedTrace<VL_DERIVED_T>::VerilatedTrace()
: m_sigs_oldvalp{nullptr}
, m_sigs_enabledp{nullptr}
, m_timeLastDump{0}
, m_fullDump{true}
, m_nextCode{0}
, m_numSignals{0}
, m_maxBits{0}
, m_scopeEscape{'.'}
, m_timeRes{1e-9}
, m_timeUnit {
1e-9
}
#ifdef VL_TRACE_OFFLOAD
, m_numOffloadBuffers { 0 }
#endif
{
template <> VerilatedTrace<VL_SUB_T, VL_BUF_T>::VerilatedTrace() {
set_time_unit(Verilated::threadContextp()->timeunitString());
set_time_resolution(Verilated::threadContextp()->timeprecisionString());
}
template <> VerilatedTrace<VL_DERIVED_T>::~VerilatedTrace() {
template <> VerilatedTrace<VL_SUB_T, VL_BUF_T>::~VerilatedTrace() {
if (m_sigs_oldvalp) VL_DO_CLEAR(delete[] m_sigs_oldvalp, m_sigs_oldvalp = nullptr);
if (m_sigs_enabledp) VL_DO_CLEAR(delete[] m_sigs_enabledp, m_sigs_enabledp = nullptr);
Verilated::removeFlushCb(VerilatedTrace<VL_DERIVED_T>::onFlush, this);
Verilated::removeExitCb(VerilatedTrace<VL_DERIVED_T>::onExit, this);
Verilated::removeFlushCb(VerilatedTrace<VL_SUB_T, VL_BUF_T>::onFlush, this);
Verilated::removeExitCb(VerilatedTrace<VL_SUB_T, VL_BUF_T>::onExit, this);
#ifdef VL_TRACE_OFFLOAD
closeBase();
#endif
@ -314,7 +307,7 @@ template <> VerilatedTrace<VL_DERIVED_T>::~VerilatedTrace() {
//=========================================================================
// Internals available to format specific implementations
template <> void VerilatedTrace<VL_DERIVED_T>::traceInit() VL_MT_UNSAFE {
template <> void VerilatedTrace<VL_SUB_T, VL_BUF_T>::traceInit() VL_MT_UNSAFE {
// Note: It is possible to re-open a trace file (VCD in particular),
// so we must reset the next code here, but it must have the same number
// of codes on re-open
@ -359,8 +352,8 @@ template <> void VerilatedTrace<VL_DERIVED_T>::traceInit() VL_MT_UNSAFE {
}
// Set callback so flush/abort will flush this file
Verilated::addFlushCb(VerilatedTrace<VL_DERIVED_T>::onFlush, this);
Verilated::addExitCb(VerilatedTrace<VL_DERIVED_T>::onExit, this);
Verilated::addFlushCb(VerilatedTrace<VL_SUB_T, VL_BUF_T>::onFlush, this);
Verilated::addExitCb(VerilatedTrace<VL_SUB_T, VL_BUF_T>::onExit, this);
#ifdef VL_TRACE_OFFLOAD
// Compute offload buffer size. we need to be able to store a new value for
@ -372,13 +365,13 @@ template <> void VerilatedTrace<VL_DERIVED_T>::traceInit() VL_MT_UNSAFE {
// Start the worker thread
m_workerThread.reset(
new std::thread{&VerilatedTrace<VL_DERIVED_T>::offloadWorkerThreadMain, this});
new std::thread{&VerilatedTrace<VL_SUB_T, VL_BUF_T>::offloadWorkerThreadMain, this});
#endif
}
template <>
bool VerilatedTrace<VL_DERIVED_T>::declCode(uint32_t code, const char* namep, uint32_t bits,
bool tri) {
bool VerilatedTrace<VL_SUB_T, VL_BUF_T>::declCode(uint32_t code, const char* namep, uint32_t bits,
bool tri) {
if (VL_UNCOVERABLE(!code)) {
VL_FATAL_MT(__FILE__, __LINE__, "", "Internal: internal trace problem, code 0 is illegal");
}
@ -422,28 +415,30 @@ bool VerilatedTrace<VL_DERIVED_T>::declCode(uint32_t code, const char* namep, ui
//=========================================================================
// Internals available to format specific implementations
template <> std::string VerilatedTrace<VL_DERIVED_T>::timeResStr() const {
template <> std::string VerilatedTrace<VL_SUB_T, VL_BUF_T>::timeResStr() const {
return doubleToTimescale(m_timeRes);
}
//=========================================================================
// External interface to client code
template <> void VerilatedTrace<VL_DERIVED_T>::set_time_unit(const char* unitp) VL_MT_SAFE {
template <> void VerilatedTrace<VL_SUB_T, VL_BUF_T>::set_time_unit(const char* unitp) VL_MT_SAFE {
m_timeUnit = timescaleToDouble(unitp);
}
template <> void VerilatedTrace<VL_DERIVED_T>::set_time_unit(const std::string& unit) VL_MT_SAFE {
template <>
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::set_time_unit(const std::string& unit) VL_MT_SAFE {
set_time_unit(unit.c_str());
}
template <> void VerilatedTrace<VL_DERIVED_T>::set_time_resolution(const char* unitp) VL_MT_SAFE {
template <>
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::set_time_resolution(const char* unitp) VL_MT_SAFE {
m_timeRes = timescaleToDouble(unitp);
}
template <>
void VerilatedTrace<VL_DERIVED_T>::set_time_resolution(const std::string& unit) VL_MT_SAFE {
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::set_time_resolution(const std::string& unit) VL_MT_SAFE {
set_time_resolution(unit.c_str());
}
template <>
void VerilatedTrace<VL_DERIVED_T>::dumpvars(int level, const std::string& hier) VL_MT_SAFE {
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::dumpvars(int level, const std::string& hier) VL_MT_SAFE {
if (level == 0) {
m_dumpvars.clear(); // empty = everything on
} else {
@ -456,7 +451,87 @@ void VerilatedTrace<VL_DERIVED_T>::dumpvars(int level, const std::string& hier)
}
}
template <> void VerilatedTrace<VL_DERIVED_T>::dump(uint64_t timeui) VL_MT_SAFE_EXCLUDES(m_mutex) {
#ifdef VL_TRACE_PARALLEL
template <> //
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::parallelWorkerTask(void* datap, bool) {
ParallelWorkerData* const wdp = reinterpret_cast<ParallelWorkerData*>(datap);
// Run the task
wdp->m_cb(wdp->m_userp, wdp->m_bufp);
// Mark buffer as ready
const VerilatedLockGuard lock{wdp->m_mutex};
wdp->m_ready.store(true);
if (wdp->m_waiting) wdp->m_cv.notify_one();
}
template <> VL_ATTR_NOINLINE void VerilatedTrace<VL_SUB_T, VL_BUF_T>::ParallelWorkerData::wait() {
// Spin for a while, waiting for the buffer to become ready
for (int i = 0; i < VL_LOCK_SPINS; ++i) {
if (VL_LIKELY(m_ready.load(std::memory_order_relaxed))) return;
VL_CPU_RELAX();
}
// We have been spinning for a while, so yield the thread
VerilatedLockGuard lock{m_mutex};
m_waiting = true;
m_cv.wait(lock, [this] { return m_ready.load(std::memory_order_relaxed); });
m_waiting = false;
}
#endif
template <>
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::runParallelCallbacks(const ParallelCallbackMap& cbMap) {
for (VlThreadPool* threadPoolp : m_threadPoolps) {
#ifdef VL_TRACE_PARALLEL
// If tracing in parallel, dispatch to the thread pool (if exists)
if (threadPoolp && threadPoolp->numThreads()) {
// List of work items for thread (std::list, as ParallelWorkerData is not movable)
std::list<ParallelWorkerData> workerData;
// We use the whole pool + the main thread
const unsigned threads = threadPoolp->numThreads() + 1;
// Main thread executes all jobs with index % threads == 0
std::vector<ParallelWorkerData*> mainThreadWorkerData;
// The tracing callbacks to execute on this thread-pool
const auto& cbVec = cbMap.at(threadPoolp);
// Enuque all the jobs
for (unsigned i = 0; i < cbVec.size(); ++i) {
const CallbackRecord& cbr = cbVec[i];
// Always get the trace buffer on the main thread
Buffer* const bufp = getTraceBuffer();
// Create new work item
workerData.emplace_back(cbr.m_dumpCb, cbr.m_userp, bufp);
// Grab the new work item
ParallelWorkerData* const itemp = &workerData.back();
// Enqueue task to thread pool, or main thread
if (unsigned rem = i % threads) {
threadPoolp->workerp(rem - 1)->addTask(parallelWorkerTask, itemp, false);
} else {
mainThreadWorkerData.push_back(itemp);
}
}
// Execute main thead jobs
for (ParallelWorkerData* const itemp : mainThreadWorkerData) {
parallelWorkerTask(itemp, false);
}
// Commit all trace buffers in order
for (ParallelWorkerData& item : workerData) {
// Wait until ready
item.wait();
// Commit the buffer
commitTraceBuffer(item.m_bufp);
}
continue;
}
#endif
// Fall back on sequential execution
for (const CallbackRecord& cbr : cbMap.at(threadPoolp)) {
Buffer* const traceBufferp = getTraceBuffer();
cbr.m_dumpCb(cbr.m_userp, traceBufferp);
commitTraceBuffer(traceBufferp);
}
}
}
template <>
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::dump(uint64_t timeui) VL_MT_SAFE_EXCLUDES(m_mutex) {
// Not really VL_MT_SAFE but more VL_MT_UNSAFE_ONE.
// This does get the mutex, but if multiple threads are trying to dump
// chances are the data being dumped will have other problems
@ -504,20 +579,14 @@ template <> void VerilatedTrace<VL_DERIVED_T>::dump(uint64_t timeui) VL_MT_SAFE_
// Run the callbacks
if (VL_UNLIKELY(m_fullDump)) {
m_fullDump = false; // No more need for next dump to be full
for (uint32_t i = 0; i < m_fullCbs.size(); ++i) {
const CallbackRecord& cbr = m_fullCbs[i];
cbr.m_dumpCb(cbr.m_userp, self());
}
runParallelCallbacks(m_fullCbs);
} else {
for (uint32_t i = 0; i < m_chgCbs.size(); ++i) {
const CallbackRecord& cbr = m_chgCbs[i];
cbr.m_dumpCb(cbr.m_userp, self());
}
runParallelCallbacks(m_chgCbs);
}
for (uint32_t i = 0; i < m_cleanupCbs.size(); ++i) {
const CallbackRecord& cbr = m_cleanupCbs[i];
cbr.m_dumpCb(cbr.m_userp, self());
cbr.m_cleanupCb(cbr.m_userp, self());
}
#ifdef VL_TRACE_OFFLOAD
@ -538,8 +607,18 @@ template <> void VerilatedTrace<VL_DERIVED_T>::dump(uint64_t timeui) VL_MT_SAFE_
// Non-hot path internal interface to Verilator generated code
template <>
void VerilatedTrace<VL_DERIVED_T>::addCallbackRecord(std::vector<CallbackRecord>& cbVec,
CallbackRecord& cbRec)
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::addThreadPool(VlThreadPool* threadPoolp)
VL_MT_SAFE_EXCLUDES(m_mutex) {
const VerilatedLockGuard lock{m_mutex};
for (VlThreadPool* const poolp : m_threadPoolps) {
if (poolp == threadPoolp) return;
}
m_threadPoolps.push_back(threadPoolp);
}
template <>
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::addCallbackRecord(std::vector<CallbackRecord>& cbVec,
CallbackRecord& cbRec)
VL_MT_SAFE_EXCLUDES(m_mutex) {
const VerilatedLockGuard lock{m_mutex};
if (VL_UNCOVERABLE(timeLastDump() != 0)) { // LCOV_EXCL_START
@ -550,91 +629,40 @@ void VerilatedTrace<VL_DERIVED_T>::addCallbackRecord(std::vector<CallbackRecord>
cbVec.push_back(cbRec);
}
template <> void VerilatedTrace<VL_DERIVED_T>::addInitCb(initCb_t cb, void* userp) VL_MT_SAFE {
template <>
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::addInitCb(initCb_t cb, void* userp) VL_MT_SAFE {
CallbackRecord cbr{cb, userp};
addCallbackRecord(m_initCbs, cbr);
}
template <> void VerilatedTrace<VL_DERIVED_T>::addFullCb(dumpCb_t cb, void* userp) VL_MT_SAFE {
template <>
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::addFullCb(dumpCb_t cb, void* userp,
VlThreadPool* threadPoolp) VL_MT_SAFE {
CallbackRecord cbr{cb, userp};
addCallbackRecord(m_fullCbs, cbr);
addThreadPool(threadPoolp);
addCallbackRecord(m_fullCbs[threadPoolp], cbr);
}
template <> void VerilatedTrace<VL_DERIVED_T>::addChgCb(dumpCb_t cb, void* userp) VL_MT_SAFE {
template <>
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::addChgCb(dumpCb_t cb, void* userp,
VlThreadPool* threadPoolp) VL_MT_SAFE {
CallbackRecord cbr{cb, userp};
addCallbackRecord(m_chgCbs, cbr);
addThreadPool(threadPoolp);
addCallbackRecord(m_chgCbs[threadPoolp], cbr);
}
template <> void VerilatedTrace<VL_DERIVED_T>::addCleanupCb(dumpCb_t cb, void* userp) VL_MT_SAFE {
template <>
void VerilatedTrace<VL_SUB_T, VL_BUF_T>::addCleanupCb(cleanupCb_t cb, void* userp) VL_MT_SAFE {
CallbackRecord cbr{cb, userp};
addCallbackRecord(m_cleanupCbs, cbr);
}
template <> void VerilatedTrace<VL_DERIVED_T>::pushNamePrefix(const std::string& prefix) {
template <> void VerilatedTrace<VL_SUB_T, VL_BUF_T>::pushNamePrefix(const std::string& prefix) {
m_namePrefixStack.push_back(m_namePrefixStack.back() + prefix);
}
template <> void VerilatedTrace<VL_DERIVED_T>::popNamePrefix(unsigned count) {
template <> void VerilatedTrace<VL_SUB_T, VL_BUF_T>::popNamePrefix(unsigned count) {
while (count--) m_namePrefixStack.pop_back();
assert(!m_namePrefixStack.empty());
}
//=========================================================================
// Hot path internal interface to Verilator generated code
// These functions must write the new value back into the old value store,
// and subsequently call the format specific emit* implementations. Note
// that this file must be included in the format specific implementation, so
// the emit* functions can be inlined for performance.
template <> void VerilatedTrace<VL_DERIVED_T>::fullBit(uint32_t* oldp, CData newval) {
const uint32_t code = oldp - m_sigs_oldvalp;
*oldp = newval; // Still copy even if not tracing so chg doesn't call full
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
self()->emitBit(code, newval);
}
template <> void VerilatedTrace<VL_DERIVED_T>::fullCData(uint32_t* oldp, CData newval, int bits) {
const uint32_t code = oldp - m_sigs_oldvalp;
*oldp = newval; // Still copy even if not tracing so chg doesn't call full
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
self()->emitCData(code, newval, bits);
}
template <> void VerilatedTrace<VL_DERIVED_T>::fullSData(uint32_t* oldp, SData newval, int bits) {
const uint32_t code = oldp - m_sigs_oldvalp;
*oldp = newval; // Still copy even if not tracing so chg doesn't call full
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
self()->emitSData(code, newval, bits);
}
template <> void VerilatedTrace<VL_DERIVED_T>::fullIData(uint32_t* oldp, IData newval, int bits) {
const uint32_t code = oldp - m_sigs_oldvalp;
*oldp = newval; // Still copy even if not tracing so chg doesn't call full
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
self()->emitIData(code, newval, bits);
}
template <> void VerilatedTrace<VL_DERIVED_T>::fullQData(uint32_t* oldp, QData newval, int bits) {
const uint32_t code = oldp - m_sigs_oldvalp;
*reinterpret_cast<QData*>(oldp) = newval;
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
self()->emitQData(code, newval, bits);
}
template <>
void VerilatedTrace<VL_DERIVED_T>::fullWData(uint32_t* oldp, const WData* newvalp, int bits) {
const uint32_t code = oldp - m_sigs_oldvalp;
for (int i = 0; i < VL_WORDS_I(bits); ++i) oldp[i] = newvalp[i];
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
self()->emitWData(code, newvalp, bits);
}
template <> void VerilatedTrace<VL_DERIVED_T>::fullDouble(uint32_t* oldp, double newval) {
const uint32_t code = oldp - m_sigs_oldvalp;
*reinterpret_cast<double*>(oldp) = newval;
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
// cppcheck-suppress invalidPointerCast
self()->emitDouble(code, newval);
}
//=========================================================================
// Primitives converting binary values to strings...
@ -725,41 +753,86 @@ static inline void cvtQDataToStr(char* dstp, QData value) {
#define cvtEDataToStr cvtIDataToStr
//=============================================================================
//=========================================================================
// VerilatedTraceBuffer
#ifdef VERILATED_VCD_TEST
void verilated_trace_imp_selftest() {
#define SELF_CHECK(got, exp) \
do { \
if ((got) != (exp)) VL_FATAL_MT(__FILE__, __LINE__, "", "%Error: selftest"); \
} while (0)
#define SELF_CHECK_TS(scale) \
SELF_CHECK(doubleToTimescale(timescaleToDouble(scale)), std::string{scale});
SELF_CHECK_TS("100s");
SELF_CHECK_TS("10s");
SELF_CHECK_TS("1s");
SELF_CHECK_TS("100ms");
SELF_CHECK_TS("10ms");
SELF_CHECK_TS("1ms");
SELF_CHECK_TS("100us");
SELF_CHECK_TS("10us");
SELF_CHECK_TS("1us");
SELF_CHECK_TS("100ns");
SELF_CHECK_TS("10ns");
SELF_CHECK_TS("1ns");
SELF_CHECK_TS("100ps");
SELF_CHECK_TS("10ps");
SELF_CHECK_TS("1ps");
SELF_CHECK_TS("100fs");
SELF_CHECK_TS("10fs");
SELF_CHECK_TS("1fs");
SELF_CHECK_TS("100as");
SELF_CHECK_TS("10as");
SELF_CHECK_TS("1as");
template <> //
VerilatedTraceBuffer<VL_SUB_T, VL_BUF_T>::VerilatedTraceBuffer(VL_SUB_T& owner)
: m_owner{owner} {
#ifdef VL_TRACE_OFFLOAD
if (m_offloadBufferWritep) {
using This = VerilatedTraceBuffer<VL_SUB_T, VL_BUF_T>*;
// Tack on the buffer address
static_assert(2 * sizeof(uint32_t) >= sizeof(This),
"This should be enough on all plafrorms");
*m_offloadBufferWritep++ = VerilatedTraceOffloadCommand::TRACE_BUFFER;
*reinterpret_cast<This*>(m_offloadBufferWritep) = this;
m_offloadBufferWritep += 2;
}
#endif
}
#endif
// These functions must write the new value back into the old value store,
// and subsequently call the format specific emit* implementations. Note
// that this file must be included in the format specific implementation, so
// the emit* functions can be inlined for performance.
template <> //
void VerilatedTraceBuffer<VL_SUB_T, VL_BUF_T>::fullBit(uint32_t* oldp, CData newval) {
const uint32_t code = oldp - m_sigs_oldvalp;
*oldp = newval; // Still copy even if not tracing so chg doesn't call full
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
self()->emitBit(code, newval);
}
template <>
void VerilatedTraceBuffer<VL_SUB_T, VL_BUF_T>::fullCData(uint32_t* oldp, CData newval, int bits) {
const uint32_t code = oldp - m_sigs_oldvalp;
*oldp = newval; // Still copy even if not tracing so chg doesn't call full
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
self()->emitCData(code, newval, bits);
}
template <>
void VerilatedTraceBuffer<VL_SUB_T, VL_BUF_T>::fullSData(uint32_t* oldp, SData newval, int bits) {
const uint32_t code = oldp - m_sigs_oldvalp;
*oldp = newval; // Still copy even if not tracing so chg doesn't call full
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
self()->emitSData(code, newval, bits);
}
template <>
void VerilatedTraceBuffer<VL_SUB_T, VL_BUF_T>::fullIData(uint32_t* oldp, IData newval, int bits) {
const uint32_t code = oldp - m_sigs_oldvalp;
*oldp = newval; // Still copy even if not tracing so chg doesn't call full
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
self()->emitIData(code, newval, bits);
}
template <>
void VerilatedTraceBuffer<VL_SUB_T, VL_BUF_T>::fullQData(uint32_t* oldp, QData newval, int bits) {
const uint32_t code = oldp - m_sigs_oldvalp;
*reinterpret_cast<QData*>(oldp) = newval;
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
self()->emitQData(code, newval, bits);
}
template <>
void VerilatedTraceBuffer<VL_SUB_T, VL_BUF_T>::fullWData(uint32_t* oldp, const WData* newvalp,
int bits) {
const uint32_t code = oldp - m_sigs_oldvalp;
for (int i = 0; i < VL_WORDS_I(bits); ++i) oldp[i] = newvalp[i];
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
self()->emitWData(code, newvalp, bits);
}
template <>
void VerilatedTraceBuffer<VL_SUB_T, VL_BUF_T>::fullDouble(uint32_t* oldp, double newval) {
const uint32_t code = oldp - m_sigs_oldvalp;
*reinterpret_cast<double*>(oldp) = newval;
if (VL_UNLIKELY(m_sigs_enabledp && !(VL_BITISSET_W(m_sigs_enabledp, code)))) return;
// cppcheck-suppress invalidPointerCast
self()->emitDouble(code, newval);
}
#endif // VL_CPPCHECK

View File

@ -62,12 +62,23 @@ constexpr unsigned VL_TRACE_MAX_VCD_CODE_SIZE = 5; // Maximum length of a VCD s
// cache-lines.
constexpr unsigned VL_TRACE_SUFFIX_ENTRY_SIZE = 8; // Size of a suffix entry
//=============================================================================
// Utility functions: TODO: put these in a common place and share them.
template <size_t N> static size_t roundUpToMultipleOf(size_t value) {
static_assert((N & (N - 1)) == 0, "'N' must be a power of 2");
size_t mask = N - 1;
return (value + mask) & ~mask;
}
//=============================================================================
// Specialization of the generics for this trace format
#define VL_DERIVED_T VerilatedVcd
#include "verilated_trace_imp.cpp"
#undef VL_DERIVED_T
#define VL_SUB_T VerilatedVcd
#define VL_BUF_T VerilatedVcdBuffer
#include "verilated_trace_imp.h"
#undef VL_SUB_T
#undef VL_BUF_T
//=============================================================================
//=============================================================================
@ -183,7 +194,7 @@ void VerilatedVcd::makeNameMap() {
deleteNameMap();
m_namemapp = new NameMap;
VerilatedTrace<VerilatedVcd>::traceInit();
Super::traceInit();
// Though not speced, it's illegal to generate a vcd with signals
// not under any module - it crashes at least two viewers.
@ -218,13 +229,17 @@ VerilatedVcd::~VerilatedVcd() {
if (m_wrBufp) VL_DO_CLEAR(delete[] m_wrBufp, m_wrBufp = nullptr);
deleteNameMap();
if (m_filep && m_fileNewed) VL_DO_CLEAR(delete m_filep, m_filep = nullptr);
#ifdef VL_TRACE_PARALLEL
assert(m_numBuffers == m_freeBuffers.size());
for (auto& pair : m_freeBuffers) VL_DO_CLEAR(delete[] pair.first, pair.first = nullptr);
#endif
}
void VerilatedVcd::closePrev() {
// This function is on the flush() call path
if (!isOpen()) return;
VerilatedTrace<VerilatedVcd>::flushBase();
Super::flushBase();
bufferFlush();
m_isOpen = false;
m_filep->close();
@ -251,14 +266,14 @@ void VerilatedVcd::close() VL_MT_SAFE_EXCLUDES(m_mutex) {
printStr(" $end\n");
}
closePrev();
// closePrev() called VerilatedTrace<VerilatedVcd>::flush(), so we just
// closePrev() called Super::flush(), so we just
// need to shut down the tracing thread here.
VerilatedTrace<VerilatedVcd>::closeBase();
Super::closeBase();
}
void VerilatedVcd::flush() VL_MT_SAFE_EXCLUDES(m_mutex) {
const VerilatedLockGuard lock{m_mutex};
VerilatedTrace<VerilatedVcd>::flushBase();
Super::flushBase();
bufferFlush();
}
@ -277,12 +292,12 @@ void VerilatedVcd::printQuad(uint64_t n) {
printStr(buf);
}
void VerilatedVcd::bufferResize(uint64_t minsize) {
void VerilatedVcd::bufferResize(size_t minsize) {
// minsize is size of largest write. We buffer at least 8 times as much data,
// writing when we are 3/4 full (with thus 2*minsize remaining free)
if (VL_UNLIKELY(minsize > m_wrChunkSize)) {
const char* oldbufp = m_wrBufp;
m_wrChunkSize = minsize * 2;
m_wrChunkSize = roundUpToMultipleOf<1024>(minsize * 2);
m_wrBufp = new char[m_wrChunkSize * 8];
std::memcpy(m_wrBufp, oldbufp, m_writep - oldbufp);
m_writep = m_wrBufp + (m_writep - oldbufp);
@ -463,14 +478,16 @@ void VerilatedVcd::declare(uint32_t code, const char* name, const char* wirep, b
int arraynum, bool tri, bool bussed, int msb, int lsb) {
const int bits = ((msb > lsb) ? (msb - lsb) : (lsb - msb)) + 1;
const bool enabled = VerilatedTrace<VerilatedVcd>::declCode(code, name, bits, tri);
const bool enabled = Super::declCode(code, name, bits, tri);
if (m_suffixes.size() <= nextCode() * VL_TRACE_SUFFIX_ENTRY_SIZE) {
m_suffixes.resize(nextCode() * VL_TRACE_SUFFIX_ENTRY_SIZE * 2, 0);
}
// Make sure write buffer is large enough (one character per bit), plus header
bufferResize(bits + 1024);
// Keep upper bound on bytes a single signal cna emit into the buffer
m_maxSignalBytes = std::max<size_t>(m_maxSignalBytes, bits + 32);
// Make sure write buffer is large enough, plus header
bufferResize(m_maxSignalBytes + 1024);
if (!enabled) return;
@ -562,26 +579,73 @@ void VerilatedVcd::declArray(uint32_t code, const char* name, bool array, int ar
void VerilatedVcd::declDouble(uint32_t code, const char* name, bool array, int arraynum) {
declare(code, name, "real", array, arraynum, false, false, 63, 0);
}
#ifdef VL_TRACE_VCD_OLD_API
void VerilatedVcd::declTriBit(uint32_t code, const char* name, bool array, int arraynum) {
declare(code, name, "wire", array, arraynum, true, false, 0, 0);
}
void VerilatedVcd::declTriBus(uint32_t code, const char* name, bool array, int arraynum, int msb,
int lsb) {
declare(code, name, "wire", array, arraynum, true, true, msb, lsb);
}
void VerilatedVcd::declTriQuad(uint32_t code, const char* name, bool array, int arraynum, int msb,
int lsb) {
declare(code, name, "wire", array, arraynum, true, true, msb, lsb);
}
void VerilatedVcd::declTriArray(uint32_t code, const char* name, bool array, int arraynum, int msb,
int lsb) {
declare(code, name, "wire", array, arraynum, true, true, msb, lsb);
}
#endif // VL_TRACE_VCD_OLD_API
//=============================================================================
// Trace rendering prinitives
// Get/commit trace buffer
VerilatedVcdBuffer* VerilatedVcd::getTraceBuffer() {
#ifdef VL_TRACE_PARALLEL
// Note: This is called from VeriltedVcd::dump, which already holds the lock
// If no buffer available, allocate a new one
if (m_freeBuffers.empty()) {
constexpr size_t pageSize = 4096;
// 4 * m_maxSignalBytes, so we can reserve 2 * m_maxSignalBytes at the end for safety
size_t startingSize = roundUpToMultipleOf<pageSize>(4 * m_maxSignalBytes);
m_freeBuffers.emplace_back(new char[startingSize], startingSize);
++m_numBuffers;
}
// Grab a buffer
const auto pair = m_freeBuffers.back();
m_freeBuffers.pop_back();
// Return the buffer
return new VerilatedVcdBuffer{*this, pair.first, pair.second};
#else
return new VerilatedVcdBuffer{*this};
#endif
}
void VerilatedVcd::commitTraceBuffer(VerilatedVcdBuffer* bufp) {
#ifdef VL_TRACE_PARALLEL
// Note: This is called from VeriltedVcd::dump, which already holds the lock
// Resize output buffer. Note, we use the full size of the trace buffer, as
// this is a lot more stable than the actual occupancy of the trace buffer.
// This helps us to avoid re-allocations due to small size changes.
bufferResize(bufp->m_size);
// Compute occupancy of buffer
const size_t usedSize = bufp->m_writep - bufp->m_bufp;
// Copy to output buffer
std::memcpy(m_writep, bufp->m_bufp, usedSize);
// Adjust write pointer
m_writep += usedSize;
// Flush if necessary
bufferCheck();
// Put buffer back on free list
m_freeBuffers.emplace_back(bufp->m_bufp, bufp->m_size);
#else
// Needs adjusting for emitTimeChange
m_writep = bufp->m_writep;
#endif
delete bufp;
}
//=============================================================================
// VerilatedVcdBuffer implementation
#ifdef VL_TRACE_PARALLEL
VerilatedVcdBuffer::VerilatedVcdBuffer(VerilatedVcd& owner, char* bufp, size_t size)
: VerilatedTraceBuffer<VerilatedVcd, VerilatedVcdBuffer>{owner}
, m_writep{bufp}
, m_bufp{bufp}
, m_size{size} {
adjustGrowp();
}
#else
VerilatedVcdBuffer::VerilatedVcdBuffer(VerilatedVcd& owner)
: VerilatedTraceBuffer<VerilatedVcd, VerilatedVcdBuffer>{owner} {}
#endif
//=============================================================================
// Trace rendering primitives
static inline void
VerilatedVcdCCopyAndAppendNewLine(char* writep, const char* suffixp) VL_ATTR_NO_SANITIZE_ALIGN;
@ -606,26 +670,55 @@ static inline void VerilatedVcdCCopyAndAppendNewLine(char* writep, const char* s
#endif
}
void VerilatedVcd::finishLine(uint32_t code, char* writep) {
const char* const suffixp = m_suffixes.data() + code * VL_TRACE_SUFFIX_ENTRY_SIZE;
void VerilatedVcdBuffer::finishLine(uint32_t code, char* writep) {
const char* const suffixp = m_suffixes + code * VL_TRACE_SUFFIX_ENTRY_SIZE;
VL_DEBUG_IFDEF(assert(suffixp[0]););
VerilatedVcdCCopyAndAppendNewLine(writep, suffixp);
// Now write back the write pointer incremented by the actual size of the
// suffix, which was stored in the last byte of the suffix buffer entry.
m_writep = writep + suffixp[VL_TRACE_SUFFIX_ENTRY_SIZE - 1];
bufferCheck();
#ifdef VL_TRACE_PARALLEL
// Double the size of the buffer if necessary
if (VL_UNLIKELY(m_writep >= m_growp)) {
// Compute occupied size of current buffer
const size_t usedSize = m_writep - m_bufp;
// We are always doubling the size
m_size *= 2;
// Allocate the new buffer
char* const newBufp = new char[m_size];
// Copy from current buffer to new buffer
std::memcpy(newBufp, m_bufp, usedSize);
// Delete current buffer
delete[] m_bufp;
// Make new buffer the current buffer
m_bufp = newBufp;
// Adjust write pointer
m_writep = m_bufp + usedSize;
// Adjust resize limit
adjustGrowp();
}
#else
// Flush the write buffer if there's not enough space left for new information
// We only call this once per vector, so we need enough slop for a very wide "b###" line
if (VL_UNLIKELY(m_writep > m_wrFlushp)) {
m_owner.m_writep = m_writep;
m_owner.bufferFlush();
m_writep = m_owner.m_writep;
}
#endif
}
//=============================================================================
// emit* trace routines
// Note: emit* are only ever called from one place (full* in
// verilated_trace_imp.cpp, which is included in this file at the top),
// verilated_trace_imp.h, which is included in this file at the top),
// so always inline them.
VL_ATTR_ALWINLINE
void VerilatedVcd::emitBit(uint32_t code, CData newval) {
void VerilatedVcdBuffer::emitBit(uint32_t code, CData newval) {
// Don't prefetch suffix as it's a bit too late;
char* wp = m_writep;
*wp++ = '0' | static_cast<char>(newval);
@ -633,7 +726,7 @@ void VerilatedVcd::emitBit(uint32_t code, CData newval) {
}
VL_ATTR_ALWINLINE
void VerilatedVcd::emitCData(uint32_t code, CData newval, int bits) {
void VerilatedVcdBuffer::emitCData(uint32_t code, CData newval, int bits) {
char* wp = m_writep;
*wp++ = 'b';
cvtCDataToStr(wp, newval << (VL_BYTESIZE - bits));
@ -641,7 +734,7 @@ void VerilatedVcd::emitCData(uint32_t code, CData newval, int bits) {
}
VL_ATTR_ALWINLINE
void VerilatedVcd::emitSData(uint32_t code, SData newval, int bits) {
void VerilatedVcdBuffer::emitSData(uint32_t code, SData newval, int bits) {
char* wp = m_writep;
*wp++ = 'b';
cvtSDataToStr(wp, newval << (VL_SHORTSIZE - bits));
@ -649,7 +742,7 @@ void VerilatedVcd::emitSData(uint32_t code, SData newval, int bits) {
}
VL_ATTR_ALWINLINE
void VerilatedVcd::emitIData(uint32_t code, IData newval, int bits) {
void VerilatedVcdBuffer::emitIData(uint32_t code, IData newval, int bits) {
char* wp = m_writep;
*wp++ = 'b';
cvtIDataToStr(wp, newval << (VL_IDATASIZE - bits));
@ -657,7 +750,7 @@ void VerilatedVcd::emitIData(uint32_t code, IData newval, int bits) {
}
VL_ATTR_ALWINLINE
void VerilatedVcd::emitQData(uint32_t code, QData newval, int bits) {
void VerilatedVcdBuffer::emitQData(uint32_t code, QData newval, int bits) {
char* wp = m_writep;
*wp++ = 'b';
cvtQDataToStr(wp, newval << (VL_QUADSIZE - bits));
@ -665,7 +758,7 @@ void VerilatedVcd::emitQData(uint32_t code, QData newval, int bits) {
}
VL_ATTR_ALWINLINE
void VerilatedVcd::emitWData(uint32_t code, const WData* newvalp, int bits) {
void VerilatedVcdBuffer::emitWData(uint32_t code, const WData* newvalp, int bits) {
int words = VL_WORDS_I(bits);
char* wp = m_writep;
*wp++ = 'b';
@ -682,272 +775,10 @@ void VerilatedVcd::emitWData(uint32_t code, const WData* newvalp, int bits) {
}
VL_ATTR_ALWINLINE
void VerilatedVcd::emitDouble(uint32_t code, double newval) {
void VerilatedVcdBuffer::emitDouble(uint32_t code, double newval) {
char* wp = m_writep;
// Buffer can't overflow before VL_SNPRINTF; we sized during declaration
VL_SNPRINTF(wp, m_wrChunkSize, "r%.16g", newval);
VL_SNPRINTF(wp, m_maxSignalBytes, "r%.16g", newval);
wp += std::strlen(wp);
finishLine(code, wp);
}
#ifdef VL_TRACE_VCD_OLD_API
void VerilatedVcd::fullBit(uint32_t code, const uint32_t newval) {
// Note the &1, so we don't require clean input -- makes more common no change case faster
*oldp(code) = newval;
*m_writep++ = ('0' + static_cast<char>(newval & 1));
m_writep = writeCode(m_writep, code);
*m_writep++ = '\n';
bufferCheck();
}
void VerilatedVcd::fullBus(uint32_t code, const uint32_t newval, int bits) {
*oldp(code) = newval;
*m_writep++ = 'b';
for (int bit = bits - 1; bit >= 0; --bit) {
*m_writep++ = ((newval & (1L << bit)) ? '1' : '0');
}
*m_writep++ = ' ';
m_writep = writeCode(m_writep, code);
*m_writep++ = '\n';
bufferCheck();
}
void VerilatedVcd::fullQuad(uint32_t code, const uint64_t newval, int bits) {
(*(reinterpret_cast<uint64_t*>(oldp(code)))) = newval;
*m_writep++ = 'b';
for (int bit = bits - 1; bit >= 0; --bit) {
*m_writep++ = ((newval & (1ULL << bit)) ? '1' : '0');
}
*m_writep++ = ' ';
m_writep = writeCode(m_writep, code);
*m_writep++ = '\n';
bufferCheck();
}
void VerilatedVcd::fullArray(uint32_t code, const uint32_t* newval, int bits) {
for (int word = 0; word < (((bits - 1) / 32) + 1); ++word) { oldp(code)[word] = newval[word]; }
*m_writep++ = 'b';
for (int bit = bits - 1; bit >= 0; --bit) {
*m_writep++ = ((newval[(bit / 32)] & (1L << (bit & 0x1f))) ? '1' : '0');
}
*m_writep++ = ' ';
m_writep = writeCode(m_writep, code);
*m_writep++ = '\n';
bufferCheck();
}
void VerilatedVcd::fullArray(uint32_t code, const uint64_t* newval, int bits) {
for (int word = 0; word < (((bits - 1) / 64) + 1); ++word) { oldp(code)[word] = newval[word]; }
*m_writep++ = 'b';
for (int bit = bits - 1; bit >= 0; --bit) {
*m_writep++ = ((newval[(bit / 64)] & (1ULL << (bit & 0x3f))) ? '1' : '0');
}
*m_writep++ = ' ';
m_writep = writeCode(m_writep, code);
*m_writep++ = '\n';
bufferCheck();
}
void VerilatedVcd::fullTriBit(uint32_t code, const uint32_t newval, const uint32_t newtri) {
oldp(code)[0] = newval;
oldp(code)[1] = newtri;
*m_writep++ = "01zz"[newval | (newtri << 1)];
m_writep = writeCode(m_writep, code);
*m_writep++ = '\n';
bufferCheck();
}
void VerilatedVcd::fullTriBus(uint32_t code, const uint32_t newval, const uint32_t newtri,
int bits) {
oldp(code)[0] = newval;
oldp(code)[1] = newtri;
*m_writep++ = 'b';
for (int bit = bits - 1; bit >= 0; --bit) {
*m_writep++ = "01zz"[((newval >> bit) & 1) | (((newtri >> bit) & 1) << 1)];
}
*m_writep++ = ' ';
m_writep = writeCode(m_writep, code);
*m_writep++ = '\n';
bufferCheck();
}
void VerilatedVcd::fullTriQuad(uint32_t code, const uint64_t newval, const uint64_t newtri,
int bits) {
(*(reinterpret_cast<uint64_t*>(oldp(code)))) = newval;
(*(reinterpret_cast<uint64_t*>(oldp(code + 1)))) = newtri;
*m_writep++ = 'b';
for (int bit = bits - 1; bit >= 0; --bit) {
*m_writep++ = "01zz"[((newval >> bit) & 1ULL) | (((newtri >> bit) & 1ULL) << 1ULL)];
}
*m_writep++ = ' ';
m_writep = writeCode(m_writep, code);
*m_writep++ = '\n';
bufferCheck();
}
void VerilatedVcd::fullTriArray(uint32_t code, const uint32_t* newvalp, const uint32_t* newtrip,
int bits) {
for (int word = 0; word < (((bits - 1) / 32) + 1); ++word) {
oldp(code)[word * 2] = newvalp[word];
oldp(code)[word * 2 + 1] = newtrip[word];
}
*m_writep++ = 'b';
for (int bit = bits - 1; bit >= 0; --bit) {
uint32_t valbit = (newvalp[(bit / 32)] >> (bit & 0x1f)) & 1;
uint32_t tribit = (newtrip[(bit / 32)] >> (bit & 0x1f)) & 1;
*m_writep++ = "01zz"[valbit | (tribit << 1)];
}
*m_writep++ = ' ';
m_writep = writeCode(m_writep, code);
*m_writep++ = '\n';
bufferCheck();
}
void VerilatedVcd::fullDouble(uint32_t code, const double newval) {
// cppcheck-suppress invalidPointerCast
(*(reinterpret_cast<double*>(oldp(code)))) = newval;
// Buffer can't overflow before VL_SNPRINTF; we sized during declaration
VL_SNPRINTF(m_writep, m_wrChunkSize, "r%.16g", newval);
m_writep += std::strlen(m_writep);
*m_writep++ = ' ';
m_writep = writeCode(m_writep, code);
*m_writep++ = '\n';
bufferCheck();
}
#endif // VL_TRACE_VCD_OLD_API
//======================================================================
//======================================================================
//======================================================================
#ifdef VERILATED_VCD_TEST
#include <iostream>
extern void verilated_trace_imp_selftest();
uint32_t v1, v2, s1, s2[3];
uint32_t tri96[3];
uint32_t tri96__tri[3];
uint64_t quad96[2];
uint64_t tquad;
uint64_t tquad__tri;
uint8_t ch;
uint64_t timestamp = 1;
double doub = 0.0;
float flo = 0.0f;
void vcdInit(void*, VerilatedVcd* vcdp, uint32_t) {
vcdp->scopeEscape('.');
vcdp->pushNamePrefix("top.");
/**/ vcdp->declBus(0x2, "v1", -1, 0, 5, 1);
/**/ vcdp->declBus(0x3, "v2", -1, 0, 6, 1);
/**/ vcdp->pushNamePrefix("sub1.");
/***/ vcdp->declBit(0x4, "s1", -1, 0);
/***/ vcdp->declBit(0x5, "ch", -1, 0);
/**/ vcdp->popNamePrefix();
/**/ vcdp->pushNamePrefix("sub2.");
/***/ vcdp->declArray(0x6, "s2", -1, 0, 40, 3);
/**/ vcdp->popNamePrefix();
vcdp->popNamePrefix();
// Note need to add 3 for next code.
vcdp->pushNamePrefix("top2.");
/**/ vcdp->declBus(0x2, "t2v1", -1, 0, 4, 1);
/**/ vcdp->declTriBit(0x10, "io1", -1, 0);
/**/ vcdp->declTriBus(0x12, "io5", -1, 0, 4, 0);
/**/ vcdp->declTriArray(0x16, "io96", -1, 0, 95, 0);
/**/ // Note need to add 6 for next code.
/**/ vcdp->declDouble(0x1c, "doub", -1, 0);
/**/ // Note need to add 2 for next code.
/**/ vcdp->declArray(0x20, "q2", -1, 0, 95, 0);
/**/ // Note need to add 4 for next code.
/**/ vcdp->declTriQuad(0x24, "tq", -1, 0, 63, 0);
/**/ // Note need to add 4 for next code.
vcdp->popNamePrefix();
}
void vcdFull(void*, VerilatedVcd* vcdp) {
vcdp->fullBus(0x2, v1, 5);
vcdp->fullBus(0x3, v2, 7);
vcdp->fullBit(0x4, s1);
vcdp->fullBus(0x5, ch, 2);
vcdp->fullArray(0x6, &s2[0], 38);
vcdp->fullTriBit(0x10, tri96[0] & 1, tri96__tri[0] & 1);
vcdp->fullTriBus(0x12, tri96[0] & 0x1f, tri96__tri[0] & 0x1f, 5);
vcdp->fullTriArray(0x16, tri96, tri96__tri, 96);
vcdp->fullDouble(0x1c, doub);
vcdp->fullArray(0x20, &quad96[0], 96);
vcdp->fullTriQuad(0x24, tquad, tquad__tri, 64);
}
void vcdChange(void*, VerilatedVcd* vcdp) {
vcdp->chgBus(0x2, v1, 5);
vcdp->chgBus(0x3, v2, 7);
vcdp->chgBit(0x4, s1);
vcdp->chgBus(0x5, ch, 2);
vcdp->chgArray(0x6, &s2[0], 38);
vcdp->chgTriBit(0x10, tri96[0] & 1, tri96__tri[0] & 1);
vcdp->chgTriBus(0x12, tri96[0] & 0x1f, tri96__tri[0] & 0x1f, 5);
vcdp->chgTriArray(0x16, tri96, tri96__tri, 96);
vcdp->chgDouble(0x1c, doub);
vcdp->chgArray(0x20, &quad96[0], 96);
vcdp->chgTriQuad(0x24, tquad, tquad__tri, 64);
}
// clang-format off
void vcdTestMain(const char* filenamep) {
verilated_trace_imp_selftest();
v1 = v2 = s1 = 0;
s2[0] = s2[1] = s2[2] = 0;
tri96[2] = tri96[1] = tri96[0] = 0;
tri96__tri[2] = tri96__tri[1] = tri96__tri[0] = ~0;
quad96[1] = quad96[0] = 0;
ch = 0;
doub = 0;
tquad = tquad__tri = 0;
{
VerilatedVcdC* vcdp = new VerilatedVcdC;
vcdp->evcd(true);
vcdp->set_time_unit("1ms");
vcdp->set_time_unit(std::string{"1ms"});
vcdp->set_time_resolution("1ns");
vcdp->set_time_resolution(std::string{"1ns"});
vcdp->spTrace()->addInitCb(&vcdInit, 0);
vcdp->spTrace()->addFullCb(&vcdFull, 0);
vcdp->spTrace()->addChgCb(&vcdChange, 0);
vcdp->open(filenamep);
// Dumping
vcdp->dump(++timestamp);
v1 = 0xfff;
tri96[2] = 4; tri96[1] = 2; tri96[0] = 1;
tri96__tri[2] = tri96__tri[1] = tri96__tri[0] = ~0; // Still tri
quad96[1] = 0xffffffff; quad96[0] = 0;
doub = 1.5;
flo = 1.4f;
vcdp->dump(++timestamp);
v2 = 0x1;
s2[1] = 2;
tri96__tri[2] = tri96__tri[1] = tri96__tri[0] = 0; // enable w/o data change
quad96[1] = 0; quad96[0] = ~0;
doub = -1.66e13;
flo = 0.123f;
tquad = 0x00ff00ff00ff00ffULL;
tquad__tri = 0x0000fffff0000ffffULL;
vcdp->dump(++timestamp);
ch = 2;
tri96[2] = ~4; tri96[1] = ~2; tri96[0] = ~1;
doub = -3.33e-13;
vcdp->dump(++timestamp);
vcdp->dump(++timestamp);
# ifdef VERILATED_VCD_TEST_64BIT
const uint64_t bytesPerDump = 15ULL;
for (uint64_t i = 0; i < ((1ULL << 32) / bytesPerDump); i++) {
v1 = i;
vcdp->dump(++timestamp);
}
# endif
vcdp->close();
VL_DO_CLEAR(delete vcdp, vcdp = nullptr);
}
}
#endif
// clang-format on
//********************************************************************
// ;compile-command: "v4make test_regress/t/t_trace_c_api.pl"
//
// Local Variables:
// End:

View File

@ -28,39 +28,20 @@
#include <string>
#include <vector>
class VerilatedVcd;
//=============================================================================
// VerilatedFile
/// Class representing a file to write to. These virtual methods can be
/// overrode for e.g. socket I/O.
class VerilatedVcdFile VL_NOT_FINAL {
private:
int m_fd = 0; // File descriptor we're writing to
public:
// METHODS
/// Construct a (as yet) closed file
VerilatedVcdFile() = default;
/// Close and destruct
virtual ~VerilatedVcdFile() = default;
/// Open a file with given filename
virtual bool open(const std::string& name) VL_MT_UNSAFE;
/// Close object's file
virtual void close() VL_MT_UNSAFE;
/// Write data to file (if it is open)
virtual ssize_t write(const char* bufp, ssize_t len) VL_MT_UNSAFE;
};
class VerilatedVcdBuffer;
class VerilatedVcdFile;
//=============================================================================
// VerilatedVcd
// Base class to create a Verilator VCD dump
// This is an internally used class - see VerilatedVcdC for what to call from applications
class VerilatedVcd VL_NOT_FINAL : public VerilatedTrace<VerilatedVcd> {
class VerilatedVcd VL_NOT_FINAL : public VerilatedTrace<VerilatedVcd, VerilatedVcdBuffer> {
public:
using Super = VerilatedTrace<VerilatedVcd, VerilatedVcdBuffer>;
private:
// Give the superclass access to private bits (to avoid virtual functions)
friend class VerilatedTrace<VerilatedVcd>;
friend Buffer; // Give the buffer access to the private bits
//=========================================================================
// VCD specific internals
@ -74,9 +55,10 @@ private:
int m_modDepth = 0; // Depth of module hierarchy
char* m_wrBufp; // Output buffer
const char* m_wrFlushp; // Output buffer flush trigger location
char* m_wrFlushp; // Output buffer flush trigger location
char* m_writep; // Write pointer into output buffer
uint64_t m_wrChunkSize; // Output buffer size
size_t m_wrChunkSize; // Output buffer size
size_t m_maxSignalBytes = 0; // Upper bound on number of bytes a single signal can generate
uint64_t m_wroteBytes = 0; // Number of bytes written to this file
std::vector<char> m_suffixes; // VCD line end string codes + metadata
@ -84,7 +66,13 @@ private:
using NameMap = std::map<const std::string, const std::string>;
NameMap* m_namemapp = nullptr; // List of names for the header
void bufferResize(uint64_t minsize);
#ifdef VL_TRACE_PARALLEL
// Vector of free trace buffers as (pointer, size) pairs.
std::vector<std::pair<char*, size_t>> m_freeBuffers;
size_t m_numBuffers = 0; // Number of trace buffers allocated
#endif
void bufferResize(size_t minsize);
void bufferFlush() VL_MT_UNSAFE_ONE;
inline void bufferCheck() {
// Flush the write buffer if there's not enough space left for new information
@ -107,8 +95,6 @@ private:
static char* writeCode(char* writep, uint32_t code);
void finishLine(uint32_t code, char* writep);
// CONSTRUCTORS
VL_UNCOPYABLE(VerilatedVcd);
@ -116,27 +102,22 @@ protected:
//=========================================================================
// Implementation of VerilatedTrace interface
// Implementations of protected virtual methods for VerilatedTrace
// Called when the trace moves forward to a new time point
virtual void emitTimeChange(uint64_t timeui) override;
// Hooks called from VerilatedTrace
virtual bool preFullDump() override { return isOpen(); }
virtual bool preChangeDump() override;
// Implementations of duck-typed methods for VerilatedTrace. These are
// called from only one place (namely full*) so always inline them.
inline void emitBit(uint32_t code, CData newval);
inline void emitCData(uint32_t code, CData newval, int bits);
inline void emitSData(uint32_t code, SData newval, int bits);
inline void emitIData(uint32_t code, IData newval, int bits);
inline void emitQData(uint32_t code, QData newval, int bits);
inline void emitWData(uint32_t code, const WData* newvalp, int bits);
inline void emitDouble(uint32_t code, double newval);
// Trace buffer management
virtual VerilatedVcdBuffer* getTraceBuffer() override;
virtual void commitTraceBuffer(VerilatedVcdBuffer*) override;
public:
//=========================================================================
// External interface to client code
// CONSTRUCTOR
explicit VerilatedVcd(VerilatedVcdFile* filep = nullptr);
~VerilatedVcd();
@ -144,7 +125,7 @@ public:
// Set size in megabytes after which new file should be created
void rolloverMB(uint64_t rolloverMB) { m_rolloverMB = rolloverMB; }
// METHODS
// METHODS - All must be thread safe
// Open the file; call isOpen() to see if errors
void open(const char* filename) VL_MT_SAFE_EXCLUDES(m_mutex);
// Open next data-only file
@ -164,168 +145,95 @@ public:
void declQuad(uint32_t code, const char* name, bool array, int arraynum, int msb, int lsb);
void declArray(uint32_t code, const char* name, bool array, int arraynum, int msb, int lsb);
void declDouble(uint32_t code, const char* name, bool array, int arraynum);
#ifdef VL_TRACE_VCD_OLD_API
//=========================================================================
// Note: These are only for testing for backward compatibility with foreign
// code and is not used by Verilator. Do not use these as there is no
// guarantee of functionality.
void declTriBit(uint32_t code, const char* name, bool array, int arraynum);
void declTriBus(uint32_t code, const char* name, bool array, int arraynum, int msb, int lsb);
void declTriQuad(uint32_t code, const char* name, bool array, int arraynum, int msb, int lsb);
void declTriArray(uint32_t code, const char* name, bool array, int arraynum, int msb, int lsb);
void fullBit(uint32_t* oldp, CData newval) { fullBit(oldp - this->oldp(0), newval); }
void fullCData(uint32_t* oldp, CData newval, int bits) {
fullBus(oldp - this->oldp(0), newval, bits);
}
void fullSData(uint32_t* oldp, SData newval, int bits) {
fullBus(oldp - this->oldp(0), newval, bits);
}
void fullIData(uint32_t* oldp, IData newval, int bits) {
fullBus(oldp - this->oldp(0), newval, bits);
}
void fullQData(uint32_t* oldp, QData newval, int bits) {
fullQuad(oldp - this->oldp(0), newval, bits);
}
void fullWData(uint32_t* oldp, const WData* newvalp, int bits) {
fullArray(oldp - this->oldp(0), newvalp, bits);
}
void fullDouble(uint32_t* oldp, double newval) { fullDouble(oldp - this->oldp(0), newval); }
inline void chgBit(uint32_t* oldp, CData newval) { chgBit(oldp - this->oldp(0), newval); }
inline void chgCData(uint32_t* oldp, CData newval, int bits) {
chgBus(oldp - this->oldp(0), newval, bits);
}
inline void chgSData(uint32_t* oldp, SData newval, int bits) {
chgBus(oldp - this->oldp(0), newval, bits);
}
inline void chgIData(uint32_t* oldp, IData newval, int bits) {
chgBus(oldp - this->oldp(0), newval, bits);
}
inline void chgQData(uint32_t* oldp, QData newval, int bits) {
chgQuad(oldp - this->oldp(0), newval, bits);
}
inline void chgWData(uint32_t* oldp, const WData* newvalp, int bits) {
chgArray(oldp - this->oldp(0), newvalp, bits);
}
inline void chgDouble(uint32_t* oldp, double newval) {
chgDouble(oldp - this->oldp(0), newval);
}
// Inside dumping routines, dump one signal, faster when not inlined
// due to code size reduction.
void fullBit(uint32_t code, const uint32_t newval);
void fullBus(uint32_t code, const uint32_t newval, int bits);
void fullQuad(uint32_t code, const uint64_t newval, int bits);
void fullArray(uint32_t code, const uint32_t* newvalp, int bits);
void fullArray(uint32_t code, const uint64_t* newvalp, int bits);
void fullTriBit(uint32_t code, const uint32_t newval, const uint32_t newtri);
void fullTriBus(uint32_t code, const uint32_t newval, const uint32_t newtri, int bits);
void fullTriQuad(uint32_t code, const uint64_t newval, const uint64_t newtri, int bits);
void fullTriArray(uint32_t code, const uint32_t* newvalp, const uint32_t* newtrip, int bits);
void fullDouble(uint32_t code, const double newval);
// Inside dumping routines, dump one signal if it has changed.
// We do want to inline these to avoid calls when the value did not change.
inline void chgBit(uint32_t code, const uint32_t newval) {
const uint32_t diff = oldp(code)[0] ^ newval;
if (VL_UNLIKELY(diff)) fullBit(code, newval);
}
inline void chgBus(uint32_t code, const uint32_t newval, int bits) {
const uint32_t diff = oldp(code)[0] ^ newval;
if (VL_UNLIKELY(diff)) {
if (VL_UNLIKELY(bits == 32 || (diff & ((1U << bits) - 1)))) {
fullBus(code, newval, bits);
}
}
}
inline void chgQuad(uint32_t code, const uint64_t newval, int bits) {
const uint64_t diff = (*(reinterpret_cast<uint64_t*>(oldp(code)))) ^ newval;
if (VL_UNLIKELY(diff)) {
if (VL_UNLIKELY(bits == 64 || (diff & ((1ULL << bits) - 1)))) {
fullQuad(code, newval, bits);
}
}
}
inline void chgArray(uint32_t code, const uint32_t* newvalp, int bits) {
for (int word = 0; word < (((bits - 1) / 32) + 1); ++word) {
if (VL_UNLIKELY(oldp(code)[word] ^ newvalp[word])) {
fullArray(code, newvalp, bits);
return;
}
}
}
inline void chgArray(uint32_t code, const uint64_t* newvalp, int bits) {
for (int word = 0; word < (((bits - 1) / 64) + 1); ++word) {
if (VL_UNLIKELY(*(reinterpret_cast<uint64_t*>(oldp(code + 2 * word)))
^ newvalp[word])) {
fullArray(code, newvalp, bits);
return;
}
}
}
inline void chgTriBit(uint32_t code, const uint32_t newval, const uint32_t newtri) {
const uint32_t diff = ((oldp(code)[0] ^ newval) | (oldp(code)[1] ^ newtri));
if (VL_UNLIKELY(diff)) {
// Verilator 3.510 and newer provide clean input, so the below
// is only for back compatibility
if (VL_UNLIKELY(diff & 1)) { // Change after clean?
fullTriBit(code, newval, newtri);
}
}
}
inline void chgTriBus(uint32_t code, const uint32_t newval, const uint32_t newtri, int bits) {
const uint32_t diff = ((oldp(code)[0] ^ newval) | (oldp(code)[1] ^ newtri));
if (VL_UNLIKELY(diff)) {
if (VL_UNLIKELY(bits == 32 || (diff & ((1U << bits) - 1)))) {
fullTriBus(code, newval, newtri, bits);
}
}
}
inline void chgTriQuad(uint32_t code, const uint64_t newval, const uint64_t newtri, int bits) {
const uint64_t diff = (((*(reinterpret_cast<uint64_t*>(oldp(code)))) ^ newval)
| ((*(reinterpret_cast<uint64_t*>(oldp(code + 1)))) ^ newtri));
if (VL_UNLIKELY(diff)) {
if (VL_UNLIKELY(bits == 64 || (diff & ((1ULL << bits) - 1)))) {
fullTriQuad(code, newval, newtri, bits);
}
}
}
inline void chgTriArray(uint32_t code, const uint32_t* newvalp, const uint32_t* newtrip,
int bits) {
for (int word = 0; word < (((bits - 1) / 32) + 1); ++word) {
if (VL_UNLIKELY((oldp(code)[word * 2] ^ newvalp[word])
| (oldp(code)[word * 2 + 1] ^ newtrip[word]))) {
fullTriArray(code, newvalp, newtrip, bits);
return;
}
}
}
inline void chgDouble(uint32_t code, const double newval) {
// cppcheck-suppress invalidPointerCast
if (VL_UNLIKELY((*(reinterpret_cast<double*>(oldp(code)))) != newval)) {
fullDouble(code, newval);
}
}
// METHODS
// Old/standalone API only
void evcd(bool flag) { m_evcd = flag; }
#endif // VL_TRACE_VCD_OLD_API
};
#ifndef DOXYGEN
// Declare specializations here they are used in VerilatedVcdC just below
template <> void VerilatedTrace<VerilatedVcd>::dump(uint64_t timeui);
template <> void VerilatedTrace<VerilatedVcd>::set_time_unit(const char* unitp);
template <> void VerilatedTrace<VerilatedVcd>::set_time_unit(const std::string& unit);
template <> void VerilatedTrace<VerilatedVcd>::set_time_resolution(const char* unitp);
template <> void VerilatedTrace<VerilatedVcd>::set_time_resolution(const std::string& unit);
template <> void VerilatedTrace<VerilatedVcd>::dumpvars(int level, const std::string& hier);
// Declare specialization here as it's used in VerilatedFstC just below
template <> void VerilatedVcd::Super::dump(uint64_t time);
template <> void VerilatedVcd::Super::set_time_unit(const char* unitp);
template <> void VerilatedVcd::Super::set_time_unit(const std::string& unit);
template <> void VerilatedVcd::Super::set_time_resolution(const char* unitp);
template <> void VerilatedVcd::Super::set_time_resolution(const std::string& unit);
template <> void VerilatedVcd::Super::dumpvars(int level, const std::string& hier);
#endif // DOXYGEN
//=============================================================================
// VerilatedVcdBuffer
class VerilatedVcdBuffer final : public VerilatedTraceBuffer<VerilatedVcd, VerilatedVcdBuffer> {
// Give the trace file access to the private bits
friend VerilatedVcd;
friend VerilatedVcd::Super;
#ifdef VL_TRACE_PARALLEL
char* m_writep; // Write pointer into m_bufp
char* m_bufp; // The beginning of the trace buffer
size_t m_size; // The size of the buffer at m_bufp
char* m_growp; // Resize limit pointer
#else
char* m_writep = m_owner.m_writep; // Write pointer into output buffer
char* const m_wrFlushp = m_owner.m_wrFlushp; // Output buffer flush trigger location
#endif
// VCD line end string codes + metadata
const char* const m_suffixes = m_owner.m_suffixes.data();
// The maximum number of bytes a single signal can emit
const size_t m_maxSignalBytes = m_owner.m_maxSignalBytes;
void finishLine(uint32_t code, char* writep);
#ifdef VL_TRACE_PARALLEL
void adjustGrowp() {
m_growp = (m_bufp + m_size) - (2 * m_maxSignalBytes);
assert(m_growp >= m_bufp + m_maxSignalBytes);
}
#endif
public:
// CONSTRUCTOR
#ifdef VL_TRACE_PARALLEL
explicit VerilatedVcdBuffer(VerilatedVcd& owner, char* bufp, size_t size);
#else
explicit VerilatedVcdBuffer(VerilatedVcd& owner);
#endif
~VerilatedVcdBuffer() = default;
//=========================================================================
// Implementation of VerilatedTraceBuffer interface
// Implementations of duck-typed methods for VerilatedTraceBuffer. These are
// called from only one place (the full* methods), so always inline them.
VL_ATTR_ALWINLINE inline void emitBit(uint32_t code, CData newval);
VL_ATTR_ALWINLINE inline void emitCData(uint32_t code, CData newval, int bits);
VL_ATTR_ALWINLINE inline void emitSData(uint32_t code, SData newval, int bits);
VL_ATTR_ALWINLINE inline void emitIData(uint32_t code, IData newval, int bits);
VL_ATTR_ALWINLINE inline void emitQData(uint32_t code, QData newval, int bits);
VL_ATTR_ALWINLINE inline void emitWData(uint32_t code, const WData* newvalp, int bits);
VL_ATTR_ALWINLINE inline void emitDouble(uint32_t code, double newval);
};
//=============================================================================
// VerilatedFile
/// Class representing a file to write to. These virtual methods can be
/// overrode for e.g. socket I/O.
class VerilatedVcdFile VL_NOT_FINAL {
private:
int m_fd = 0; // File descriptor we're writing to
public:
// METHODS
/// Construct a (as yet) closed file
VerilatedVcdFile() = default;
/// Close and destruct
virtual ~VerilatedVcdFile() = default;
/// Open a file with given filename
virtual bool open(const std::string& name) VL_MT_UNSAFE;
/// Close object's file
virtual void close() VL_MT_UNSAFE;
/// Write data to file (if it is open)
virtual ssize_t write(const char* bufp, ssize_t len) VL_MT_UNSAFE;
};
//=============================================================================
// VerilatedVcdC
/// Class representing a VCD dump file in C standalone (no SystemC)
@ -396,16 +304,6 @@ public:
// Internal class access
inline VerilatedVcd* spTrace() { return &m_sptrace; }
#ifdef VL_TRACE_VCD_OLD_API
//=========================================================================
// Note: These are only for testing for backward compatibility with foreign
// code and is not used by Verilator. Do not use these as there is no
// guarantee of functionality.
// Use evcd format
void evcd(bool flag) VL_MT_UNSAFE_ONE { m_sptrace.evcd(flag); }
#endif
};
#endif // guard

View File

@ -40,6 +40,7 @@
#ifdef __GNUC__
# define VL_ATTR_ALIGNED(alignment) __attribute__((aligned(alignment)))
# define VL_ATTR_ALWINLINE __attribute__((always_inline))
# define VL_ATTR_NOINLINE __attribute__((noinline))
# define VL_ATTR_COLD __attribute__((cold))
# define VL_ATTR_HOT __attribute__((hot))
# define VL_ATTR_NORETURN __attribute__((noreturn))
@ -82,6 +83,9 @@
#ifndef VL_ATTR_ALWINLINE
# define VL_ATTR_ALWINLINE ///< Attribute to inline, even when not optimizing
#endif
#ifndef VL_ATTR_NOINLINE
# define VL_ATTR_NOINLINE ///< Attribute to never inline, even when optimizing
#endif
#ifndef VL_ATTR_COLD
# define VL_ATTR_COLD ///< Attribute that function is rarely executed
#endif

View File

@ -8533,6 +8533,7 @@ public:
AstNodeDType* childDTypep() const { return VN_AS(op1p(), NodeDType); }
void childDTypep(AstNodeDType* nodep) { setOp1p(nodep); }
AstNode* itemsp() const { return op2p(); } // op2 = AstPatReplicate, AstPatMember, etc
void addItemsp(AstNode* nodep) { addOp2p(nodep); }
};
class AstPatMember final : public AstNodeMath {
// Verilog '{a} or '{a{b}}

View File

@ -106,7 +106,7 @@ public:
}
// Get a reference to the user data
T_Data& operator()(const T_Node* nodep) {
T_Data& operator()(const T_Node* nodep) const {
T_Data* const userp = getUserp(nodep);
UASSERT_OBJ(userp, nodep, "Missing User data on const AstNode");
return *userp;

View File

@ -496,7 +496,7 @@ private:
V3Case::caseLint(nodep);
iterateChildren(nodep);
if (debug() >= 9) nodep->dumpTree(cout, " case_old: ");
if (isCaseTreeFast(nodep) && v3Global.opt.oCase()) {
if (isCaseTreeFast(nodep) && v3Global.opt.fCase()) {
// It's a simple priority encoder or complete statement
// we can make a tree of statements to avoid extra comparisons
++m_statCaseFast;

View File

@ -111,6 +111,15 @@ class ConstBitOpTreeVisitor final : public VNVisitor {
BitPolarityEntry() = default;
};
struct FrozenNodeInfo final { // Context when a frozen node is found
bool m_polarity;
int m_lsb;
bool operator<(const FrozenNodeInfo& other) const {
if (m_lsb != other.m_lsb) return m_lsb < other.m_lsb;
return m_polarity < other.m_polarity;
}
};
class Restorer final { // Restore the original state unless disableRestore() is called
ConstBitOpTreeVisitor& m_visitor;
const size_t m_polaritiesSize;
@ -299,7 +308,8 @@ class ConstBitOpTreeVisitor final : public VNVisitor {
LeafInfo* m_leafp = nullptr; // AstConst or AstVarRef that currently looking for
const AstNode* const m_rootp; // Root of this AST subtree
std::vector<AstNode*> m_frozenNodes; // Nodes that cannot be optimized
std::vector<std::pair<AstNode*, FrozenNodeInfo>>
m_frozenNodes; // Nodes that cannot be optimized
std::vector<BitPolarityEntry> m_bitPolarities; // Polarity of bits found during iterate()
std::vector<std::unique_ptr<VarInfo>> m_varInfos; // VarInfo for each variable, [0] is nullptr
@ -487,7 +497,7 @@ class ConstBitOpTreeVisitor final : public VNVisitor {
restorer.restoreNow();
// Reach past a cast then add to frozen nodes to be added to final reduction
if (const AstCCast* const castp = VN_CAST(opp, CCast)) opp = castp->lhsp();
m_frozenNodes.push_back(opp);
m_frozenNodes.emplace_back(opp, FrozenNodeInfo{m_polarity, m_lsb});
m_failed = origFailed;
continue;
}
@ -652,17 +662,21 @@ public:
}
}
std::map<FrozenNodeInfo, std::vector<AstNode*>> frozenNodes; // Group by FrozenNodeInfo
// Check if frozen terms are clean or not
for (AstNode* const termp : visitor.m_frozenNodes) {
for (const auto& frozenInfo : visitor.m_frozenNodes) {
AstNode* const termp = frozenInfo.first;
// Comparison operators are clean
if (VN_IS(termp, Eq) || VN_IS(termp, Neq) || VN_IS(termp, Lt) || VN_IS(termp, Lte)
|| VN_IS(termp, Gt) || VN_IS(termp, Gte)) {
if ((VN_IS(termp, Eq) || VN_IS(termp, Neq) || VN_IS(termp, Lt) || VN_IS(termp, Lte)
|| VN_IS(termp, Gt) || VN_IS(termp, Gte))
&& frozenInfo.second.m_lsb == 0) {
hasCleanTerm = true;
} else {
// Otherwise, conservatively assume the frozen term is dirty
hasDirtyTerm = true;
UINFO(9, "Dirty frozen term: " << termp << endl);
}
frozenNodes[frozenInfo.second].push_back(termp);
}
// Figure out if a final negation is required
@ -672,7 +686,12 @@ public:
const bool needsCleaning = visitor.isAndTree() ? !hasCleanTerm : hasDirtyTerm;
// Add size of reduction tree to op count
resultOps += termps.size() + visitor.m_frozenNodes.size() - 1;
resultOps += termps.size() - 1;
for (const auto& lsbAndNodes : frozenNodes) {
if (lsbAndNodes.first.m_lsb > 0) ++resultOps; // Needs AstShiftR
if (!lsbAndNodes.first.m_polarity) ++resultOps; // Needs AstNot
resultOps += lsbAndNodes.second.size();
}
// Add final polarity flip in Xor tree
if (needsFlip) ++resultOps;
// Add final cleaning AND
@ -681,7 +700,10 @@ public:
if (debug() >= 9) { // LCOV_EXCL_START
cout << "Bitop tree considered: " << endl;
for (AstNode* const termp : termps) termp->dumpTree("Reduced term: ");
for (AstNode* const termp : visitor.m_frozenNodes) termp->dumpTree("Frozen term: ");
for (const std::pair<AstNode*, FrozenNodeInfo>& termp : visitor.m_frozenNodes)
termp.first->dumpTree("Frozen term with lsb " + std::to_string(termp.second.m_lsb)
+ " polarity " + std::to_string(termp.second.m_polarity)
+ ": ");
cout << "Needs flipping: " << needsFlip << endl;
cout << "Needs cleaning: " << needsCleaning << endl;
cout << "Size: " << resultOps << " input size: " << visitor.m_ops << endl;
@ -724,8 +746,25 @@ public:
resultp = reduce(resultp, termp);
}
// Add any frozen terms to the reduction
for (AstNode* const frozenp : visitor.m_frozenNodes) {
resultp = reduce(resultp, frozenp->unlinkFrBack());
for (auto&& nodes : frozenNodes) {
// nodes.second has same lsb and polarity
AstNode* termp = nullptr;
for (AstNode* const itemp : nodes.second) {
termp = reduce(termp, itemp->unlinkFrBack());
}
if (nodes.first.m_lsb > 0) { // LSB is not 0, so shiftR
AstNodeDType* const dtypep = termp->dtypep();
termp = new AstShiftR{termp->fileline(), termp,
new AstConst(termp->fileline(), AstConst::WidthedValue{},
termp->width(), nodes.first.m_lsb)};
termp->dtypep(dtypep);
}
if (!nodes.first.m_polarity) { // Polarity is inverted, so append Not
AstNodeDType* const dtypep = termp->dtypep();
termp = new AstNot{termp->fileline(), termp};
termp->dtypep(dtypep);
}
resultp = reduce(resultp, termp);
}
// Set width of masks to expected result width. This is required to prevent later removal
@ -1051,7 +1090,7 @@ private:
bool matchBitOpTree(AstNode* nodep) {
if (nodep->widthMin() != 1) return false;
if (!v3Global.opt.oConstBitOpTree()) return false;
if (!v3Global.opt.fConstBitOpTree()) return false;
string debugPrefix;
if (debug() >= 9) { // LCOV_EXCL_START
@ -1373,7 +1412,7 @@ private:
return (VN_IS(nodep, And) || VN_IS(nodep, Or) || VN_IS(nodep, Xor));
}
bool ifAdjacentSel(const AstSel* lhsp, const AstSel* rhsp) {
if (!v3Global.opt.oAssemble()) return false; // opt disabled
if (!v3Global.opt.fAssemble()) return false; // opt disabled
if (!lhsp || !rhsp) return false;
const AstNode* const lfromp = lhsp->fromp();
const AstNode* const rfromp = rhsp->fromp();
@ -1388,7 +1427,7 @@ private:
}
bool ifMergeAdjacent(AstNode* lhsp, AstNode* rhsp) {
// called by concatmergeable to determine if {lhsp, rhsp} make sense
if (!v3Global.opt.oAssemble()) return false; // opt disabled
if (!v3Global.opt.fAssemble()) return false; // opt disabled
// two same varref
if (operandsSame(lhsp, rhsp)) return true;
const AstSel* lselp = VN_CAST(lhsp, Sel);
@ -1425,7 +1464,7 @@ private:
}
bool concatMergeable(const AstNode* lhsp, const AstNode* rhsp, unsigned depth) {
// determine if {a OP b, c OP d} => {a, c} OP {b, d} is advantageous
if (!v3Global.opt.oAssemble()) return false; // opt disabled
if (!v3Global.opt.fAssemble()) return false; // opt disabled
if (lhsp->type() != rhsp->type()) return false;
if (!ifConcatMergeableBiop(lhsp)) return false;
if (depth > CONCAT_MERGABLE_MAX_DEPTH) return false; // As worse case O(n^2) algorithm
@ -2511,7 +2550,7 @@ private:
if (nodep->access().isReadOnly()
&& ((!m_params // Can reduce constant wires into equations
&& m_doNConst
&& v3Global.opt.oConst()
&& v3Global.opt.fConst()
// Default value, not a "known" constant for this usage
&& !nodep->varp()->isClassMember()
&& !(nodep->varp()->isFuncLocal() && nodep->varp()->isNonOutput())

View File

@ -752,26 +752,26 @@ class EmitCTrace final : EmitCFunc {
const string func = nodep->full() ? "full" : "chg";
bool emitWidth = true;
if (nodep->dtypep()->basicp()->isDouble()) {
puts("tracep->" + func + "Double");
puts("bufp->" + func + "Double");
emitWidth = false;
} else if (nodep->isWide() || emitTraceIsScBv(nodep) || emitTraceIsScBigUint(nodep)) {
puts("tracep->" + func + "WData");
puts("bufp->" + func + "WData");
} else if (nodep->isQuad()) {
puts("tracep->" + func + "QData");
puts("bufp->" + func + "QData");
} else if (nodep->declp()->widthMin() > 16) {
puts("tracep->" + func + "IData");
puts("bufp->" + func + "IData");
} else if (nodep->declp()->widthMin() > 8) {
puts("tracep->" + func + "SData");
puts("bufp->" + func + "SData");
} else if (nodep->declp()->widthMin() > 1) {
puts("tracep->" + func + "CData");
puts("bufp->" + func + "CData");
} else {
puts("tracep->" + func + "Bit");
puts("bufp->" + func + "Bit");
emitWidth = false;
}
const uint32_t offset = (arrayindex < 0) ? 0 : (arrayindex * nodep->declp()->widthWords());
const uint32_t code = nodep->declp()->code() + offset;
puts(v3Global.opt.useTraceOffloadThread() && !nodep->full() ? "(base+" : "(oldp+");
puts(v3Global.opt.useTraceOffload() && !nodep->full() ? "(base+" : "(oldp+");
puts(cvtToStr(code - nodep->baseCode()));
puts(",");
emitTraceValue(nodep, arrayindex);

View File

@ -113,9 +113,8 @@ class CMakeEmitter final {
cmake_set_raw(*of, name + "_COVERAGE", v3Global.opt.coverage() ? "1" : "0");
*of << "# Threaded output mode? 0/1/N threads (from --threads)\n";
cmake_set_raw(*of, name + "_THREADS", cvtToStr(v3Global.opt.threads()));
*of << "# Threaded tracing output mode? 0/1/N threads (from --trace-threads)\n";
cmake_set_raw(*of, name + "_TRACE_THREADS",
cvtToStr(v3Global.opt.useTraceOffloadThread()));
*of << "# Threaded tracing output mode? 0/1/N threads (from --threads/--trace-threads)\n";
cmake_set_raw(*of, name + "_TRACE_THREADS", cvtToStr(v3Global.opt.vmTraceThreads()));
cmake_set_raw(*of, name + "_TRACE_FST_WRITER_THREAD",
v3Global.opt.traceThreads() && v3Global.opt.traceFormat().fst() ? "1" : "0");
*of << "# Struct output mode? 0/1 (from --trace-structs)\n";

View File

@ -73,9 +73,10 @@ public:
of.puts("VM_TRACE_FST = ");
of.puts(v3Global.opt.trace() && v3Global.opt.traceFormat().fst() ? "1" : "0");
of.puts("\n");
of.puts("# Tracing threaded output mode? 0/1/N threads (from --trace-thread)\n");
of.puts(
"# Tracing threaded output mode? 0/1/N threads (from --threads/--trace-thread)\n");
of.puts("VM_TRACE_THREADS = ");
of.puts(cvtToStr(v3Global.opt.useTraceOffloadThread()));
of.puts(cvtToStr(v3Global.opt.vmTraceThreads()));
of.puts("\n");
of.puts("# Separate FST writer thread? 0/1 (from --trace-fst with --trace-thread > 0)\n");
of.puts("VM_TRACE_FST_WRITER_THREAD = ");

View File

@ -397,11 +397,11 @@ private:
// Then propagate more complicated equations
optimizeSignals(true);
// Remove redundant logic
if (v3Global.opt.oDedupe()) {
if (v3Global.opt.fDedupe()) {
dedupe();
if (debug() >= 6) m_graph.dumpDotFilePrefixed("gate_dedup");
}
if (v3Global.opt.oAssemble()) {
if (v3Global.opt.fAssemble()) {
mergeAssigns();
if (debug() >= 6) m_graph.dumpDotFilePrefixed("gate_assm");
}

View File

@ -254,7 +254,7 @@ void GraphAcyc::simplify(bool allowCut) {
if (allowCut) {
// The main algorithm works without these, though slower
// So if changing the main algorithm, comment these out for a test run
if (v3Global.opt.oAcycSimp()) {
if (v3Global.opt.fAcycSimp()) {
cutBasic(vertexp);
cutBackward(vertexp);
}

View File

@ -42,6 +42,34 @@
//
// Also merges consecutive AstNodeIf statements with the same condition.
//
// Because this optimization has notable performance impact, we go further
// and perform code motion to try to move mergeable conditionals next to each
// other, which in turn enable us to merge more conditionals. To do this, we
// perform an analysis pass, followed by an optimization pass on the whole
// AstCFunc we are optimizing.
//
// The analysis pass gathers, for each statement in the tree, the information
// relevant for determining whether two statements can be swapped, and some
// other additional information that is useful during optimization.
//
// The optimization pass tries to move conditionals near each other, first by
// trying to move a conditional node backwards in the list, so it becomes the
// direct successor of another earlier conditional with the same condition.
// If this is not possible due to variable interference, then we additionally
// try to pull earlier conditionals with the same condition closer forward to
// be the immediate predecessor of the conditional node. We limit maximum
// distance a node can travel to an empirically chosen but otherwise arbitrary
// constant. This limits worst case complexity to be O(n) rather than O(n^2).
// The worst case complexity manifests when N/2 conditionals, all with unique
// conditions are succeeded by N/2 conditionals with the same unique
// conditions, such that each unique condition is used by exactly 2
// conditionals. In this case N/2 all nodes need to travel approx N/2 distance.
// Limiting the distance bounds the latter, hence limiting complexity.
//
// Once the analysis and optimization passes have been applied to the whole
// function, any merged conditionals will then undergo the same analysis,
// optimization, and merging again in their individual branches.
//
//*************************************************************************
#include "config_build.h"
@ -51,71 +79,364 @@
#include "V3MergeCond.h"
#include "V3Stats.h"
#include "V3Ast.h"
#include "V3AstUserAllocator.h"
#include "V3Hasher.h"
#include "V3DupFinder.h"
#include <queue>
#include <set>
namespace {
//######################################################################
// Utilities
enum class Mergeable {
YES, // Tree can be merged
NO_COND_ASSIGN, // Tree cannot be merged because it contains an assignment to a condition
NO_IMPURE // Tree cannot be merged because it contains an impure node
// This function extracts the Cond node from the RHS of an assignment,
// if there is one and it is in a supported position, which are:
// - RHS is the Cond
// - RHS is And(Const, Cond). This And is inserted often by V3Clean.
AstNodeCond* extractCondFromRhs(AstNode* rhsp) {
if (AstNodeCond* const condp = VN_CAST(rhsp, NodeCond)) {
return condp;
} else if (const AstAnd* const andp = VN_CAST(rhsp, And)) {
if (AstNodeCond* const condp = VN_CAST(andp->rhsp(), NodeCond)) {
if (VN_IS(andp->lhsp(), Const)) return condp;
}
}
return nullptr;
}
// Predicate to check if two sets are disjoint. This is stable, as we only need
// to determine if the sets contain a shared element, which is a boolean
// property. It is also efficient as we use sorted sets, and therefore can
// enumerate elements in order (what the ordering is, is unimportant), meaning
// the worst case complexity is O(size of smaller set).
bool areDisjoint(const std::set<const AstVar*>& a, const std::set<const AstVar*>& b) {
if (a.empty() || b.empty()) return true;
const auto endA = a.end();
const auto endB = b.end();
auto itA = a.begin();
auto itB = b.begin();
while (true) {
if (*itA == *itB) return false;
if (std::less<const AstVar*>{}(*itA, *itB)) {
itA = std::lower_bound(++itA, endA, *itB);
if (itA == endA) return true;
} else {
itB = std::lower_bound(++itB, endB, *itA);
if (itB == endB) return true;
}
}
}
//######################################################################
// Structure containing information required for code motion/merging
struct StmtProperties {
AstNode* m_condp = nullptr; // The condition expression, if a conditional node
std::set<const AstVar*> m_rdVars; // Variables read by this statement
std::set<const AstVar*> m_wrVars; // Variables writen by this statement
bool m_isFence = false; // Nothing should move across this statement, nor should it be merged
AstNodeStmt* m_prevWithSameCondp = nullptr; // Previous node in same list, with same condition
bool writesConditionVar() const {
// This relies on MarkVarsVisitor having been called on the condition node
for (const AstVar* const varp : m_wrVars) {
if (varp->user1()) return true;
}
return false;
}
};
class CheckMergeableVisitor final : public VNVisitor {
private:
// STATE
bool m_condAssign = false; // Does this tree contain an assignment to a condition variable??
bool m_impure = false; // Does this tree contain an impure node?
// We store the statement properties in user3 via AstUser3Allocator
using StmtPropertiesAllocator = AstUser3Allocator<AstNodeStmt, StmtProperties>;
// METHODS
VL_DEBUG_FUNC; // Declare debug()
//######################################################################
// Code motion analysis and implementation
// VISITORS
virtual void visit(AstNode* nodep) override {
if (m_impure) return;
// Clear if node is impure
if (!nodep->isPure()) {
UINFO(9, "Not mergeable due to impure node" << nodep << endl);
m_impure = true;
return;
// Pure analysis visitor that build the StmtProperties for each statement in the given
// AstNode list (following AstNode::nextp())
class CodeMotionAnalysisVisitor final : public VNVisitor {
// NODE STATE
// AstNodeStmt::user3 -> StmtProperties (accessed via m_stmtProperties, managed externally,
// see MergeCondVisitor::process)
// AstNode::user4 -> Used by V3Hasher
// AstNode::user5 -> AstNode*: Set on a condition node, points to the last conditional
// with that condition so far encountered in the same AstNode list
VNUser5InUse m_user5InUse;
StmtPropertiesAllocator& m_stmtProperties;
// MEMBERS
V3Hasher m_hasher; // Used by V3DupFinder
// Stack of a V3DupFinder used for finding identical condition expressions within one
// statement list.
std::vector<V3DupFinder> m_stack;
StmtProperties* m_propsp = nullptr; // StmtProperties structure of current AstNodeStmt
// Extract condition expression from a megeable conditional statement, if any
static AstNode* extractCondition(const AstNodeStmt* nodep) {
AstNode* conditionp = nullptr;
if (const AstNodeAssign* const assignp = VN_CAST(nodep, NodeAssign)) {
if (AstNodeCond* const conditionalp = extractCondFromRhs(assignp->rhsp())) {
conditionp = conditionalp->condp();
}
} else if (const AstNodeIf* const ifp = VN_CAST(nodep, NodeIf)) {
conditionp = ifp->condp();
}
while (AstCCast* const castp = VN_CAST(conditionp, CCast)) conditionp = castp->lhsp();
return conditionp;
}
void analyzeStmt(AstNodeStmt* nodep, bool tryCondMatch) {
VL_RESTORER(m_propsp);
// Keep hold of props of enclosing statement
StmtProperties* const outerPropsp = m_propsp;
// Grab the props of this statement
m_propsp = &m_stmtProperties(nodep);
// Extract condition from statement
if (AstNode* const condp = extractCondition(nodep)) {
// Remember condition node. We always need this as it is used in the later
// traversal.
m_propsp->m_condp = condp;
// If this is a conditional statement, try to find an earlier one with the same
// condition in the same list (unless we have been told not to bother because we know
// this node is in a singleton list).
if (tryCondMatch) {
// Grab the duplicate finder of this list
V3DupFinder& dupFinder = m_stack.back();
// Find a duplicate condition
const V3DupFinder::iterator& dit = dupFinder.findDuplicate(condp);
if (dit == dupFinder.end()) {
// First time seeing this condition in the current list
dupFinder.insert(condp);
// Remember last statement with this condition (which is this statement)
condp->user5p(nodep);
} else {
// Seen a conditional with the same condition earlier in the current list
AstNode* const firstp = dit->second;
// Add to properties for easy retrieval during optimization
m_propsp->m_prevWithSameCondp = static_cast<AstNodeStmt*>(firstp->user5p());
// Remember last statement with this condition (which is this statement)
firstp->user5p(nodep);
}
}
}
// Analyse this statement
analyzeNode(nodep);
// If there is an enclosing statement, propagate properties upwards
if (outerPropsp) {
// Add all rd/wr vars to outer statement
outerPropsp->m_rdVars.insert(m_propsp->m_rdVars.cbegin(), m_propsp->m_rdVars.cend());
outerPropsp->m_wrVars.insert(m_propsp->m_wrVars.cbegin(), m_propsp->m_wrVars.cend());
// If this statement is impure, the enclosing statement is also impure
if (m_propsp->m_isFence) outerPropsp->m_isFence = true;
}
}
void analyzeVarRef(AstVarRef* nodep) {
const VAccess access = nodep->access();
AstVar* const varp = nodep->varp();
// Gather read and written variables
if (access.isReadOrRW()) m_propsp->m_rdVars.insert(varp);
if (access.isWriteOrRW()) m_propsp->m_wrVars.insert(varp);
}
void analyzeNode(AstNode* nodep) {
// If an impure node under a statement, mark that statement as impure
if (m_propsp && !nodep->isPure()) m_propsp->m_isFence = true;
// Analyze children
iterateChildrenConst(nodep);
}
virtual void visit(AstVarRef* nodep) override {
if (m_impure || m_condAssign) return;
// Clear if it's an LValue referencing a marked variable
if (nodep->access().isWriteOrRW() && nodep->varp()->user1()) {
UINFO(9, "Not mergeable due assignment to condition" << nodep << endl);
m_condAssign = true;
// VISITORS
void visit(AstNode* nodep) override {
// Push a new stack entry at the start of a list, but only if the list is not a
// single element (this saves a lot of allocations in expressions)
bool singletonListStart = false;
if (nodep->backp()->nextp() != nodep) { // If at head of list
singletonListStart = nodep->nextp() == nullptr;
if (!singletonListStart) m_stack.emplace_back(m_hasher);
}
// Analyse node
if (AstNodeStmt* const stmtp = VN_CAST(nodep, NodeStmt)) {
analyzeStmt(stmtp, /*tryCondMatch:*/ !singletonListStart);
} else if (AstVarRef* const vrefp = VN_CAST(nodep, VarRef)) {
analyzeVarRef(vrefp);
} else {
analyzeNode(nodep);
}
// Pop the stack at the end of a list
if (!singletonListStart && !nodep->nextp()) m_stack.pop_back();
}
// CONSTRUCTOR
CodeMotionAnalysisVisitor(AstNode* nodep, StmtPropertiesAllocator& stmtProperties)
: m_stmtProperties(stmtProperties) {
iterateAndNextConstNull(nodep);
}
public:
CheckMergeableVisitor() = default;
// Return false if this node should not be merged at all because:
// - It contains an impure expression
// - It contains an LValue referencing the condition
Mergeable operator()(const AstNode* node) {
m_condAssign = false;
m_impure = false;
iterateChildrenConst(const_cast<AstNode*>(node));
if (m_impure) { // Impure is stronger than cond assign
return Mergeable::NO_IMPURE;
} else if (m_condAssign) {
return Mergeable::NO_COND_ASSIGN;
} else {
return Mergeable::YES;
}
// Analyse the statement list starting at nodep, filling in stmtProperties.
static void analyze(AstNode* nodep, StmtPropertiesAllocator& stmtProperties) {
CodeMotionAnalysisVisitor{nodep, stmtProperties};
}
};
class CodeMotionOptimizeVisitor final : public VNVisitor {
// Do not move a node more than this many statements.
// This bounds complexity at O(N), rather than O(N^2).
static constexpr unsigned MAX_DISTANCE = 500;
// NODE STATE
// AstNodeStmt::user3 -> StmtProperties (accessed via m_stmtProperties, managed externally,
// see MergeCondVisitor::process)
// AstNodeStmt::user4 -> bool: Already processed this node
VNUser4InUse m_user4InUse;
const StmtPropertiesAllocator& m_stmtProperties;
// MEMBERS
// Predicate that checks if the order of two statements can be swapped
bool areSwappable(const AstNodeStmt* ap, const AstNodeStmt* bp) const {
const StmtProperties& aProps = m_stmtProperties(ap);
const StmtProperties& bProps = m_stmtProperties(bp);
// Don't move across fences
if (aProps.m_isFence) return false;
if (bProps.m_isFence) return false;
// If either statement writes a variable that the other reads, they are not swappable
if (!areDisjoint(aProps.m_rdVars, bProps.m_wrVars)) return false;
if (!areDisjoint(bProps.m_rdVars, aProps.m_wrVars)) return false;
// If they both write to the same variable, they are not swappable
if (!areDisjoint(aProps.m_wrVars, bProps.m_wrVars)) return false;
// Otherwise good to go
return true;
}
// VISITORS
void visit(AstNodeStmt* nodep) override {
// Process only on first encounter
if (nodep->user4SetOnce()) return;
// First re-order children
iterateChildren(nodep);
// Grab hold of previous node with same condition
AstNodeStmt* prevp = m_stmtProperties(nodep).m_prevWithSameCondp;
// If no previous node with same condition, we are done
if (!prevp) return;
#ifdef VL_DEBUG
{ // Sanity check, only in debug build, otherwise expensive
const AstNode* currp = prevp;
while (currp && currp != nodep) currp = currp->nextp();
UASSERT_OBJ(currp, nodep, "Predecessor not in same list as " << currp);
}
#endif
// Otherwise try to move this node backwards, as close as we can to the previous node
// with the same condition
if (AstNodeStmt* predp = VN_CAST(nodep->backp(), NodeStmt)) {
// 'predp' is the newly computed predecessor node of 'nodep', which is initially
// (without movement) the 'backp' of the node.
for (unsigned i = MAX_DISTANCE; i; --i) {
// If the predecessor is the previous node with the same condition, job done
if (predp == prevp) break;
// Don't move past a non-statement (e.g.: AstVar), or end of list
AstNodeStmt* const backp = VN_CAST(predp->backp(), NodeStmt);
if (!backp) break;
// Don't swap statements if doing so would change program semantics
if (!areSwappable(predp, nodep)) break;
// Otherwise move 'nodep' back
predp = backp;
}
// If we decided that 'nodep' should be moved back
if (nodep->backp() != predp) {
// Move the current node to directly follow the computed predecessor
nodep->unlinkFrBack();
predp->addNextHere(nodep);
// If the predecessor is the previous node with the same condition, job done
if (predp == prevp) return;
}
}
// If we reach here, it means we were unable to move the current node all the way back
// such that it immediately follows the previous statement with the same condition. Now
// try to move all previous statements with the same condition forward, in the hope of
// compacting the list further.
for (AstNodeStmt* currp = nodep; prevp;
currp = prevp, prevp = m_stmtProperties(currp).m_prevWithSameCondp) {
// Move prevp (previous statement with same condition) towards currp
if (AstNodeStmt* succp = VN_CAST(prevp->nextp(), NodeStmt)) {
// 'succp' is the newly computed successor node of 'prevp', which is initially
// (without movement) the 'nextp' of the node.
for (unsigned i = MAX_DISTANCE; --i;) {
// If the successor of the previous statement with same condition is the
// target node, we are done with this predecessor
if (succp == currp) break;
// Don't move past a non-statement (e.g.: AstVar), or end of list
AstNodeStmt* const nextp = VN_CAST(succp->nextp(), NodeStmt);
if (!nextp) break;
// Don't swap statements if doing so would change program semantics
if (!areSwappable(prevp, succp)) break;
// Otherwise move further forward
succp = nextp;
}
// If we decided that 'prevp' should be moved forward
if (prevp->nextp() != succp) {
// Move the current node to directly before the computed successor
prevp->unlinkFrBack();
succp->addHereThisAsNext(prevp);
}
}
}
}
void visit(AstNode* nodep) override {} // Ignore all non-statements
// CONSTRUCTOR
CodeMotionOptimizeVisitor(AstNode* nodep, const StmtPropertiesAllocator& stmtProperties)
: m_stmtProperties(stmtProperties) {
// We assert the given node is at the head of the list otherwise we might move a node
// before the given node. This is easy to fix in the above iteration with a check on a
// boundary node we should not move past, if we ever need to do so.
// Note: we will do iterateAndNextNull which requires nodep->backp() != nullptr anyway
UASSERT_OBJ(nodep->backp()->nextp() != nodep, nodep, "Must be at head of list");
// Optimize the list
iterateAndNextNull(nodep);
}
public:
// Given an AstNode list (held via AstNode::nextp()), move conditional statements as close
// together as possible
static AstNode* optimize(AstNode* nodep, const StmtPropertiesAllocator& stmtProperties) {
CodeMotionOptimizeVisitor{nodep, stmtProperties};
// It is possible for the head of the list to be moved later such that it is no longer
// in head position. If so, rewind the list and return the new head.
while (nodep->backp()->nextp() == nodep) nodep = nodep->backp();
return nodep;
}
};
//######################################################################
// Conditional merging
class MergeCondVisitor final : public VNVisitor {
private:
// NODE STATE
// AstVar::user1 -> Flag set for variables referenced by m_mgCondp
// AstNode::user2 -> Flag marking node as included in merge because cheap to duplicate
const VNUser1InUse m_user1InUse;
const VNUser2InUse m_user2InUse;
// AstVar::user1 -> bool: Set for variables referenced by m_mgCondp
// (Only below MergeCondVisitor::process).
// AstNode::user2 -> bool: Marking node as included in merge because cheap to
// duplicate
// (Only below MergeCondVisitor::process).
// AstNodeStmt::user3 -> StmtProperties
// (Only below MergeCondVisitor::process).
// AstNode::user4 -> See CodeMotionAnalysisVisitor/CodeMotionOptimizeVisitor
// AstNode::user5 -> See CodeMotionAnalysisVisitor
// STATE
VDouble0 m_statMerges; // Statistic tracking
@ -128,24 +449,84 @@ private:
const AstNode* m_mgNextp = nullptr; // Next node in list being examined
uint32_t m_listLenght = 0; // Length of current list
CheckMergeableVisitor m_checkMergeable; // Sub visitor for encapsulation & speed
std::queue<AstNode*>* m_workQueuep = nullptr; // Node lists (via AstNode::nextp()) to merge
// Statement properties for code motion and merging
StmtPropertiesAllocator* m_stmtPropertiesp = nullptr;
// METHODS
VL_DEBUG_FUNC; // Declare debug()
// This function extracts the Cond node from the RHS, if there is one and
// it is in a supported position, which are:
// - RHS is the Cond
// - RHS is And(Const, Cond). This And is inserted often by V3Clean.
static AstNodeCond* extractCond(AstNode* rhsp) {
if (AstNodeCond* const condp = VN_CAST(rhsp, NodeCond)) {
return condp;
} else if (const AstAnd* const andp = VN_CAST(rhsp, And)) {
if (AstNodeCond* const condp = VN_CAST(andp->rhsp(), NodeCond)) {
if (VN_IS(andp->lhsp(), Const)) return condp;
}
// Function that processes a whole sub-tree
void process(AstNode* nodep) {
// Set up work queue
std::queue<AstNode*> workQueue;
m_workQueuep = &workQueue;
m_workQueuep->push(nodep);
do {
// Set up user* for this iteration
const VNUser1InUse user1InUse;
const VNUser2InUse user2InUse;
const VNUser3InUse user3InUse;
// Statement properties only preserved for this iteration,
// then memory is released immediately.
StmtPropertiesAllocator stmtProperties;
m_stmtPropertiesp = &stmtProperties;
// Pop off current work item
AstNode* currp = m_workQueuep->front();
m_workQueuep->pop();
// Analyse sub-tree list for code motion
CodeMotionAnalysisVisitor::analyze(currp, stmtProperties);
// Perform the code motion within the whole sub-tree list
currp = CodeMotionOptimizeVisitor::optimize(currp, stmtProperties);
// Merge conditionals in the whole sub-tree list (this might create new work items)
iterateAndNextNull(currp);
// Close pending merge, if there is one at the end of the whole sub-tree list
if (m_mgFirstp) mergeEnd();
} while (!m_workQueuep->empty());
}
// Skip past AstArraySel and AstWordSel with const index
static AstNode* skipConstSels(AstNode* nodep) {
while (const AstArraySel* const aselp = VN_CAST(nodep, ArraySel)) {
// ArraySel index is not constant, so might be expensive
if (!VN_IS(aselp->bitp(), Const)) return nodep;
nodep = aselp->fromp();
}
return nullptr;
while (const AstWordSel* const wselp = VN_CAST(nodep, WordSel)) {
// WordSel index is not constant, so might be expensive
if (!VN_IS(wselp->bitp(), Const)) return nodep;
nodep = wselp->fromp();
}
return nodep;
}
// Check if this node is cheap enough that duplicating it in two branches of an
// AstIf is not likely to cause a performance degradation.
static bool isCheapNode(AstNode* nodep) {
// Comments are cheap
if (VN_IS(nodep, Comment)) return true;
// So are some assignments
if (const AstNodeAssign* const assignp = VN_CAST(nodep, NodeAssign)) {
// Check LHS
AstNode* const lhsp = skipConstSels(assignp->lhsp());
// LHS is not a VarRef, so might be expensive
if (!VN_IS(lhsp, VarRef)) return false;
// Check RHS
AstNode* const rhsp = skipConstSels(assignp->rhsp());
// RHS is not a VarRef or Constant so might be expensive
if (!VN_IS(rhsp, VarRef) && !VN_IS(rhsp, Const)) return false;
// Otherwise it is a cheap assignment
return true;
}
// Others are not
return false;
}
// Predicate to check if an expression yields only 0 or 1 (i.e.: a 1-bit value)
@ -196,23 +577,21 @@ private:
static AstNode* maskLsb(AstNode* nodep) {
if (yieldsOneOrZero(nodep)) return nodep;
// Otherwise apply masking
AstNode* const maskp = new AstConst(nodep->fileline(), AstConst::BitTrue());
AstNode* const maskp = new AstConst{nodep->fileline(), AstConst::BitTrue()};
// Mask on left, as conventional
return new AstAnd(nodep->fileline(), maskp, nodep);
return new AstAnd{nodep->fileline(), maskp, nodep};
}
// Fold the RHS expression assuming the given condition state. Unlink bits
// from the RHS which is only used once, and can be reused. What remains
// of the RHS is expected to be deleted by the caller.
// Fold the RHS expression of an assignment assuming the given condition state.
// Unlink bits from the RHS which is only used once, and can be reused (is an unomdified
// sub-tree). What remains of the RHS is expected to be deleted by the caller.
AstNode* foldAndUnlink(AstNode* rhsp, bool condTrue) {
if (rhsp->sameTree(m_mgCondp)) {
return new AstConst(rhsp->fileline(), AstConst::BitTrue{}, condTrue);
} else if (const AstNodeCond* const condp = extractCond(rhsp)) {
return new AstConst{rhsp->fileline(), AstConst::BitTrue{}, condTrue};
} else if (const AstNodeCond* const condp = extractCondFromRhs(rhsp)) {
AstNode* const resp
= condTrue ? condp->expr1p()->unlinkFrBack() : condp->expr2p()->unlinkFrBack();
if (condp == rhsp) { //
return resp;
}
if (condp == rhsp) return resp;
if (const AstAnd* const andp = VN_CAST(rhsp, And)) {
UASSERT_OBJ(andp->rhsp() == condp, rhsp, "Should not try to fold this");
return new AstAnd{andp->fileline(), andp->lhsp()->cloneTree(false), resp};
@ -227,17 +606,18 @@ private:
return condTrue ? maskLsb(andp->lhsp()->unlinkFrBack())
: new AstConst{rhsp->fileline(), AstConst::BitFalse()};
}
} else if (VN_IS(rhsp, WordSel) || VN_IS(rhsp, VarRef) || VN_IS(rhsp, Const)) {
} else if (VN_IS(rhsp, ArraySel) || VN_IS(rhsp, WordSel) || VN_IS(rhsp, VarRef)
|| VN_IS(rhsp, Const)) {
return rhsp->cloneTree(false);
}
rhsp->dumpTree("Don't know how to fold expression: ");
rhsp->v3fatalSrc("Don't know how to fold expression");
// LCOV_EXCL_START
if (debug()) rhsp->dumpTree("Don't know how to fold expression: ");
rhsp->v3fatalSrc("Should not try to fold this during conditional merging");
// LCOV_EXCL_STOP
}
void mergeEnd(int lineno) {
UASSERT(m_mgFirstp, "mergeEnd without list " << lineno);
// We might want to recursively merge an AstIf. We stash it in this variable.
const AstNodeIf* recursivep = nullptr;
void mergeEnd() {
UASSERT(m_mgFirstp, "mergeEnd without list");
// Drop leading cheap nodes. These were only added in the hope of finding
// an earlier reduced form, but we failed to do so.
while (m_mgFirstp->user2() && m_mgFirstp != m_mgLastp) {
@ -254,8 +634,11 @@ private:
m_mgLastp = m_mgLastp->backp();
--m_listLenght;
UASSERT_OBJ(m_mgLastp && m_mgLastp->nextp() == nextp, m_mgFirstp,
"Cheap assignment should not be at the front of the list");
"Cheap statement should not be at the front of the list");
}
// If the list contains a single AstNodeIf, we will want to merge its branches.
// If so, keep hold of the AstNodeIf in this variable.
AstNodeIf* recursivep = nullptr;
// Merge if list is longer than one node
if (m_mgFirstp != m_mgLastp) {
UINFO(6, "MergeCond - First: " << m_mgFirstp << " Last: " << m_mgLastp << endl);
@ -266,7 +649,7 @@ private:
// and we also need to keep track of it for comparisons later.
m_mgCondp = m_mgCondp->cloneTree(false);
// Create equivalent 'if' statement and insert it before the first node
AstIf* const resultp = new AstIf(m_mgCondp->fileline(), m_mgCondp);
AstIf* const resultp = new AstIf{m_mgCondp->fileline(), m_mgCondp};
m_mgFirstp->addHereThisAsNext(resultp);
// Unzip the list and insert under branches
AstNode* nextp = m_mgFirstp;
@ -308,10 +691,12 @@ private:
VL_DO_DANGLING(ifp->deleteTree(), ifp);
}
} while (nextp);
// Recursively merge the resulting AstIf
recursivep = resultp;
} else if (const AstNodeIf* const ifp = VN_CAST(m_mgFirstp, NodeIf)) {
// There was nothing to merge this AstNodeIf with, but try to merge it's branches
// Merge the branches of the resulting AstIf after re-analysis
if (resultp->ifsp()) m_workQueuep->push(resultp->ifsp());
if (resultp->elsesp()) m_workQueuep->push(resultp->elsesp());
} else if (AstNodeIf* const ifp = VN_CAST(m_mgFirstp, NodeIf)) {
// There was nothing to merge this AstNodeIf with, so try to merge its branches.
// No re-analysis is required for this, so do it directly below
recursivep = ifp;
}
// Reset state
@ -321,14 +706,13 @@ private:
m_mgNextp = nullptr;
AstNode::user1ClearTree(); // Clear marked variables
AstNode::user2ClearTree();
// Merge recursively within the branches
// Merge recursively within the branches of an un-merged AstNodeIF
if (recursivep) {
iterateAndNextNull(recursivep->ifsp());
// Close list, if there is one at the end of the then branch
if (m_mgFirstp) mergeEnd(__LINE__);
iterateAndNextNull(recursivep->elsesp());
// Close list, if there is one at the end of the else branch
if (m_mgFirstp) mergeEnd(__LINE__);
// Close a pending merge to ensure merge state is
// reset as expected at the end of this function
if (m_mgFirstp) mergeEnd();
}
}
@ -351,47 +735,16 @@ private:
return false;
}
// Check if this node is cheap enough that duplicating it in two branches of an
// AstIf and is hence not likely to cause a performance degradation if doing so.
bool isCheapNode(AstNode* nodep) const {
if (VN_IS(nodep, Comment)) return true;
if (const AstNodeAssign* const assignp = VN_CAST(nodep, NodeAssign)) {
// Check LHS
AstNode* lhsp = assignp->lhsp();
while (AstWordSel* const wselp = VN_CAST(lhsp, WordSel)) {
// WordSel index is not constant, so might be expensive
if (!VN_IS(wselp->bitp(), Const)) return false;
lhsp = wselp->fromp();
}
// LHS is not a VarRef, so might be expensive
if (!VN_IS(lhsp, VarRef)) return false;
// Check RHS
AstNode* rhsp = assignp->rhsp();
while (AstWordSel* const wselp = VN_CAST(rhsp, WordSel)) {
// WordSel index is not constant, so might be expensive
if (!VN_IS(wselp->bitp(), Const)) return false;
rhsp = wselp->fromp();
}
// RHS is not a VarRef or Constant so might be expensive
if (!VN_IS(rhsp, VarRef) && !VN_IS(rhsp, Const)) return false;
// Otherwise it is a cheap assignment
return true;
}
return false;
}
bool addToList(AstNode* nodep, AstNode* condp, int line) {
bool addToList(AstNodeStmt* nodep, AstNode* condp) {
// Set up head of new list if node is first in list
if (!m_mgFirstp) {
UASSERT_OBJ(condp, nodep, "Cannot start new list without condition " << line);
UASSERT_OBJ(condp, nodep, "Cannot start new list without condition");
// Mark variable references in the condition
condp->foreach<AstVarRef>([](const AstVarRef* nodep) { nodep->varp()->user1(1); });
// Now check again if mergeable. We need this to pick up assignments to conditions,
// e.g.: 'c = c ? a : b' at the beginning of the list, which is in fact not mergeable
// because it updates the condition. We simply bail on these.
if (m_checkMergeable(nodep) != Mergeable::YES) {
if ((*m_stmtPropertiesp)(nodep).writesConditionVar()) {
// Clear marked variables
AstNode::user1ClearTree();
// We did not add to the list
@ -400,11 +753,13 @@ private:
m_mgFirstp = nodep;
m_mgCondp = condp;
m_listLenght = 0;
// Add any preceding nodes to the list that would allow us to extend the merge range
for (;;) {
AstNode* const backp = m_mgFirstp->backp();
// Add any preceding nodes to the list that would allow us to extend the merge
// range
while (true) {
AstNodeStmt* const backp = VN_CAST(m_mgFirstp->backp(), NodeStmt);
if (!backp || backp->nextp() != m_mgFirstp) break; // Don't move up the tree
if (m_checkMergeable(backp) != Mergeable::YES) break;
const StmtProperties& props = (*m_stmtPropertiesp)(backp);
if (props.m_isFence || props.writesConditionVar()) break;
if (isSimplifiableNode(backp)) {
++m_listLenght;
m_mgFirstp = backp;
@ -424,59 +779,53 @@ private:
// Set up expected next node in list.
m_mgNextp = nodep->nextp();
// If last under parent, done with current list
if (!m_mgNextp) mergeEnd(__LINE__);
if (!m_mgNextp) mergeEnd();
// We did add to the list
return true;
}
// If this node is the next expected node and is helpful to add to the list, do so,
// otherwise end the current merge. Return ture if added, false if ended merge.
bool addIfHelpfulElseEndMerge(AstNode* nodep) {
bool addIfHelpfulElseEndMerge(AstNodeStmt* nodep) {
UASSERT_OBJ(m_mgFirstp, nodep, "List must be open");
if (m_mgNextp == nodep) {
if (isSimplifiableNode(nodep)) {
if (addToList(nodep, nullptr, __LINE__)) return true;
if (addToList(nodep, nullptr)) return true;
} else if (isCheapNode(nodep)) {
nodep->user2(1);
if (addToList(nodep, nullptr, __LINE__)) return true;
if (addToList(nodep, nullptr)) return true;
}
}
// Not added to list, so we are done with the current list
mergeEnd(__LINE__);
mergeEnd();
return false;
}
bool checkOrMakeMergeable(AstNode* nodep) {
const Mergeable reason = m_checkMergeable(nodep);
// If meregeable, we are done
if (reason == Mergeable::YES) return true;
// Node not mergeable.
// If no current list, then this node is just special, move on.
if (!m_mgFirstp) return false;
// Otherwise finish current list
mergeEnd(__LINE__);
// If a tree was not mergeable due to an assignment to a condition,
// then finishing the current list makes it mergeable again.
return reason == Mergeable::NO_COND_ASSIGN;
bool checkOrMakeMergeable(const AstNodeStmt* nodep) {
const StmtProperties& props = (*m_stmtPropertiesp)(nodep);
if (props.m_isFence) return false; // Fence node never mergeable
// If the statement writes a condition variable of a pending merge,
// we must end the pending merge
if (m_mgFirstp && props.writesConditionVar()) mergeEnd();
return true; // Now surely mergeable
}
void mergeEndIfIncompatible(AstNode* nodep, AstNode* condp) {
void mergeEndIfIncompatible(const AstNode* nodep, const AstNode* condp) {
if (m_mgFirstp && (m_mgNextp != nodep || !condp->sameTree(m_mgCondp))) {
// Node in different list, or has different condition. Finish current list.
mergeEnd(__LINE__);
mergeEnd();
}
}
// VISITORS
virtual void visit(AstNodeAssign* nodep) override {
AstNode* const rhsp = nodep->rhsp();
if (const AstNodeCond* const condp = extractCond(rhsp)) {
if (AstNode* const condp = (*m_stmtPropertiesp)(nodep).m_condp) {
// Check if mergeable
if (!checkOrMakeMergeable(nodep)) return;
// Close potentially incompatible pending merge
mergeEndIfIncompatible(nodep, condp->condp());
mergeEndIfIncompatible(nodep, condp);
// Add current node
addToList(nodep, condp->condp(), __LINE__);
addToList(nodep, condp);
} else if (m_mgFirstp) {
addIfHelpfulElseEndMerge(nodep);
}
@ -493,21 +842,22 @@ private:
// Close potentially incompatible pending merge
mergeEndIfIncompatible(nodep, nodep->condp());
// Add current node
addToList(nodep, nodep->condp(), __LINE__);
addToList(nodep, nodep->condp());
}
virtual void visit(AstNodeStmt* nodep) override {
if (m_mgFirstp && addIfHelpfulElseEndMerge(nodep)) return;
iterateChildren(nodep);
}
virtual void visit(AstCFunc* nodep) override {
// Merge function body
if (nodep->stmtsp()) process(nodep->stmtsp());
}
// For speed, only iterate what is necessary.
virtual void visit(AstNetlist* nodep) override { iterateAndNextNull(nodep->modulesp()); }
virtual void visit(AstNodeModule* nodep) override { iterateAndNextNull(nodep->stmtsp()); }
virtual void visit(AstCFunc* nodep) override {
iterateChildren(nodep);
// Close list, if there is one at the end of the function
if (m_mgFirstp) mergeEnd(__LINE__);
}
virtual void visit(AstNodeStmt* nodep) override {
if (m_mgFirstp && addIfHelpfulElseEndMerge(nodep)) return;
iterateChildren(nodep);
}
virtual void visit(AstNode* nodep) override {}
public:
@ -520,6 +870,8 @@ public:
}
};
} // namespace
//######################################################################
// MergeConditionals class functions

View File

@ -30,6 +30,7 @@ struct V3OptionParser::Impl {
// Setting for isOnOffAllowed() and isPartialMatchAllowed()
enum class en : uint8_t {
NONE, // "-opt"
FONOFF, // "-fopt" and "-fno-opt"
ONOFF, // "-opt" and "-no-opt"
VALUE // "-opt val"
};
@ -39,6 +40,7 @@ struct V3OptionParser::Impl {
bool m_undocumented = false; // This option is not documented
public:
virtual bool isValueNeeded() const override final { return MODE == en::VALUE; }
virtual bool isFOnOffAllowed() const override final { return MODE == en::FONOFF; }
virtual bool isOnOffAllowed() const override final { return MODE == en::ONOFF; }
virtual bool isPartialMatchAllowed() const override final { return ALLOW_PARTIAL_MATCH; }
virtual bool isUndocumented() const override { return m_undocumented; }
@ -47,6 +49,7 @@ struct V3OptionParser::Impl {
// Actual action classes
template <typename T> class ActionSet; // "-opt" for bool-ish, "-opt val" for int and string
template <typename BOOL> class ActionFOnOff; // "-fopt" and "-fno-opt" for bool-ish
template <typename BOOL> class ActionOnOff; // "-opt" and "-no-opt" for bool-ish
class ActionCbCall; // Callback without argument for "-opt"
class ActionCbOnOff; // Callback for "-opt" and "-no-opt"
@ -80,6 +83,7 @@ V3OPTION_PARSER_DEF_ACT_CLASS(ActionSet, VOptionBool, m_valp->setTrueOrFalse(tru
V3OPTION_PARSER_DEF_ACT_CLASS(ActionSet, int, *m_valp = std::atoi(argp), en::VALUE);
V3OPTION_PARSER_DEF_ACT_CLASS(ActionSet, string, *m_valp = argp, en::VALUE);
V3OPTION_PARSER_DEF_ACT_CLASS(ActionFOnOff, bool, *m_valp = !hasPrefixFNo(optp), en::FONOFF);
V3OPTION_PARSER_DEF_ACT_CLASS(ActionOnOff, bool, *m_valp = !hasPrefixNo(optp), en::ONOFF);
#ifndef V3OPTION_PARSER_NO_VOPTION_BOOL
V3OPTION_PARSER_DEF_ACT_CLASS(ActionOnOff, VOptionBool, m_valp->setTrueOrFalse(!hasPrefixNo(optp)),
@ -117,12 +121,23 @@ V3OPTION_PARSER_DEF_ACT_CB_CLASS(ActionCbPartialMatchVal, void(const char*, cons
V3OptionParser::ActionIfs* V3OptionParser::find(const char* optp) {
const auto it = m_pimpl->m_options.find(optp);
if (it != m_pimpl->m_options.end()) return it->second.get();
if (it != m_pimpl->m_options.end()) return it->second.get(); // Exact match
for (auto&& act : m_pimpl->m_options) {
if (act.second->isFOnOffAllowed()) { // Find starts with "-fno"
if (const char* const nop
= VString::startsWith(optp, "-fno-") ? (optp + strlen("-fno-")) : nullptr) {
if (act.first.substr(strlen("-f"), std::string::npos)
== nop) { // [-f]opt = [-fno-]opt
return act.second.get();
}
}
}
if (act.second->isOnOffAllowed()) { // Find starts with "-no"
const char* const nop = VString::startsWith(optp, "-no") ? (optp + 3) : nullptr;
if (nop && (act.first == nop || act.first == (string{"-"} + nop))) {
return act.second.get();
if (const char* const nop
= VString::startsWith(optp, "-no") ? (optp + strlen("-no")) : nullptr) {
if (act.first == nop || act.first == (string{"-"} + nop)) {
return act.second.get();
}
}
} else if (act.second->isPartialMatchAllowed()) {
if (VString::startsWith(optp, act.first)) return act.second.get();
@ -143,6 +158,12 @@ V3OptionParser::ActionIfs& V3OptionParser::add(const std::string& opt, ARG arg)
return *insertedResult.first->second;
}
bool V3OptionParser::hasPrefixFNo(const char* strp) {
UASSERT(strp[0] == '-', strp << " does not start with '-'");
if (strp[1] == '-') ++strp;
return VString::startsWith(strp, "-fno");
}
bool V3OptionParser::hasPrefixNo(const char* strp) {
UASSERT(strp[0] == '-', strp << " does not start with '-'");
if (strp[1] == '-') ++strp;
@ -178,6 +199,10 @@ void V3OptionParser::finalize() {
for (auto&& opt : m_pimpl->m_options) {
if (opt.second->isUndocumented()) continue;
m_pimpl->m_spellCheck.pushCandidate(opt.first);
if (opt.second->isFOnOffAllowed()) {
m_pimpl->m_spellCheck.pushCandidate(
"-fno-" + opt.first.substr(strlen("-f"), std::string::npos));
}
if (opt.second->isOnOffAllowed()) m_pimpl->m_spellCheck.pushCandidate("-no" + opt.first);
}
m_pimpl->m_isFinalized = true;
@ -202,6 +227,7 @@ V3OPTION_PARSER_DEF_OP(Set, VOptionBool*, ActionSet<VOptionBool>)
#endif
V3OPTION_PARSER_DEF_OP(Set, int*, ActionSet<int>)
V3OPTION_PARSER_DEF_OP(Set, string*, ActionSet<string>)
V3OPTION_PARSER_DEF_OP(FOnOff, bool*, ActionFOnOff<bool>)
V3OPTION_PARSER_DEF_OP(OnOff, bool*, ActionOnOff<bool>)
#ifndef V3OPTION_PARSER_NO_VOPTION_BOOL
V3OPTION_PARSER_DEF_OP(OnOff, VOptionBool*, ActionOnOff<VOptionBool>)

View File

@ -66,6 +66,7 @@ private:
// METHODS
ActionIfs* find(const char* optp);
template <class ACT, class ARG> ActionIfs& add(const string& opt, ARG arg);
static bool hasPrefixFNo(const char* strp); // Returns true if strp starts with "-fno"
static bool hasPrefixNo(const char* strp); // Returns true if strp starts with "-no"
public:
@ -87,6 +88,7 @@ class V3OptionParser::ActionIfs VL_NOT_FINAL {
public:
virtual ~ActionIfs() = default;
virtual bool isValueNeeded() const = 0; // Need val of "-opt val"
virtual bool isFOnOffAllowed() const = 0; // true if "-fno-opt" is allowd
virtual bool isOnOffAllowed() const = 0; // true if "-no-opt" is allowd
virtual bool isPartialMatchAllowed() const = 0; // true if "-Wno-" matches "-Wno-fatal"
virtual bool isUndocumented() const = 0; // Will not be suggested in typo
@ -101,13 +103,15 @@ class V3OptionParser::AppendHelper final {
public:
// TYPES
// Tag to specify which operator() to call
struct Set {}; // For ActionSet
struct FOnOff {}; // For ActionFOnOff
struct OnOff {}; // For ActionOnOff
struct Set {}; // For ActionSet
struct CbCall {}; // For ActionCbCall
struct CbOnOff {}; // For ActionOnOff
struct CbVal {}; // For ActionCbVal
struct CbOnOff {}; // For ActionOnOff of ActionFOnOff
struct CbPartialMatch {}; // For ActionCbPartialMatch
struct CbPartialMatchVal {}; // For ActionCbPartialMatchVal
struct CbVal {}; // For ActionCbVal
private:
// MEMBERS
@ -122,6 +126,7 @@ public:
ActionIfs& operator()(const char* optp, Set, int*) const;
ActionIfs& operator()(const char* optp, Set, string*) const;
ActionIfs& operator()(const char* optp, FOnOff, bool*) const;
ActionIfs& operator()(const char* optp, OnOff, bool*) const;
#ifndef V3OPTION_PARSER_NO_VOPTION_BOOL
ActionIfs& operator()(const char* optp, OnOff, VOptionBool*) const;
@ -144,13 +149,14 @@ public:
#define V3OPTION_PARSER_DECL_TAGS \
const auto Set VL_ATTR_UNUSED = V3OptionParser::AppendHelper::Set{}; \
const auto FOnOff VL_ATTR_UNUSED = V3OptionParser::AppendHelper::FOnOff{}; \
const auto OnOff VL_ATTR_UNUSED = V3OptionParser::AppendHelper::OnOff{}; \
const auto CbCall VL_ATTR_UNUSED = V3OptionParser::AppendHelper::CbCall{}; \
const auto CbOnOff VL_ATTR_UNUSED = V3OptionParser::AppendHelper::CbOnOff{}; \
const auto CbVal VL_ATTR_UNUSED = V3OptionParser::AppendHelper::CbVal{}; \
const auto CbPartialMatch VL_ATTR_UNUSED = V3OptionParser::AppendHelper::CbPartialMatch{}; \
const auto CbPartialMatchVal VL_ATTR_UNUSED \
= V3OptionParser::AppendHelper::CbPartialMatchVal {}
= V3OptionParser::AppendHelper::CbPartialMatchVal{}; \
const auto CbVal VL_ATTR_UNUSED = V3OptionParser::AppendHelper::CbVal{};
//######################################################################

View File

@ -775,8 +775,16 @@ void V3Options::notify() {
&& !v3Global.opt.xmlOnly());
}
// --trace-threads implies --threads 1 unless explicitly specified
if (traceThreads() && !threads()) m_threads = 1;
if (trace()) {
// With --trace-fst, --trace-threads implies --threads 1 unless explicitly specified
if (traceFormat().fst() && traceThreads() && !threads()) m_threads = 1;
// With --trace, --trace-threads is ignored
if (traceFormat().vcd()) m_traceThreads = threads() ? 1 : 0;
}
UASSERT(!(useTraceParallel() && useTraceOffload()),
"Cannot use both parallel and offloaded tracing");
// Default split limits if not specified
if (m_outputSplitCFuncs < 0) m_outputSplitCFuncs = m_outputSplit;
@ -1075,6 +1083,28 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char
});
DECL_OPTION("-flatten", OnOff, &m_flatten);
DECL_OPTION("-facyc-simp", FOnOff, &m_fAcycSimp);
DECL_OPTION("-fassemble", FOnOff, &m_fAssemble);
DECL_OPTION("-fcase", FOnOff, &m_fCase);
DECL_OPTION("-fcombine", FOnOff, &m_fCombine);
DECL_OPTION("-fconst", FOnOff, &m_fConst);
DECL_OPTION("-fconst-bit-op-tree", FOnOff, &m_fConstBitOpTree);
DECL_OPTION("-fdedup", FOnOff, &m_fDedupe);
DECL_OPTION("-fexpand", FOnOff, &m_fExpand);
DECL_OPTION("-fgate", FOnOff, &m_fGate);
DECL_OPTION("-finline", FOnOff, &m_fInline);
DECL_OPTION("-flife", FOnOff, &m_fLife);
DECL_OPTION("-flife-post", FOnOff, &m_fLifePost);
DECL_OPTION("-flocalize", FOnOff, &m_fLocalize);
DECL_OPTION("-fmerge-cond", FOnOff, &m_fMergeCond);
DECL_OPTION("-fmerge-const-pool", FOnOff, &m_fMergeConstPool);
DECL_OPTION("-freloop", FOnOff, &m_fReloop);
DECL_OPTION("-freorder", FOnOff, &m_fReorder);
DECL_OPTION("-fsplit", FOnOff, &m_fSplit);
DECL_OPTION("-fsubst", FOnOff, &m_fSubst);
DECL_OPTION("-fsubst-const", FOnOff, &m_fSubstConst);
DECL_OPTION("-ftable", FOnOff, &m_fTable);
DECL_OPTION("-G", CbPartialMatch, [this](const char* optp) { addParameter(optp, false); });
DECL_OPTION("-gate-stmts", Set, &m_gateStmts);
DECL_OPTION("-gdb", CbCall, []() {}); // Processed only in bin/verilator shell
@ -1144,50 +1174,51 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char
}
});
DECL_OPTION("-max-num-width", Set, &m_maxNumWidth);
DECL_OPTION("-merge-const-pool", OnOff, &m_mergeConstPool);
DECL_OPTION("-mod-prefix", Set, &m_modPrefix);
DECL_OPTION("-O", CbPartialMatch, [this](const char* optp) {
// Optimization
DECL_OPTION("-O0", CbCall, [this]() { optimize(0); });
DECL_OPTION("-O1", CbCall, [this]() { optimize(1); });
DECL_OPTION("-O2", CbCall, [this]() { optimize(2); });
DECL_OPTION("-O3", CbCall, [this]() { optimize(3); });
DECL_OPTION("-O", CbPartialMatch, [this, fl](const char* optp) {
// Optimization, e.g. -O1rX
// LCOV_EXCL_START
fl->v3warn(DEPRECATED, "Option -O<letter> is deprecated. "
"Use -f<optimization> or -fno-<optimization> instead.");
for (const char* cp = optp; *cp; ++cp) {
const bool flag = isupper(*cp);
switch (tolower(*cp)) {
case '0': optimize(0); break; // 0=all off
case '1': optimize(1); break; // 1=all on
case '2': optimize(2); break; // 2=not used
case '3': optimize(3); break; // 3=high
case 'a': m_oTable = flag; break;
case 'b': m_oCombine = flag; break;
case 'c': m_oConst = flag; break;
case 'd': m_oDedupe = flag; break;
case 'e': m_oCase = flag; break;
// f
case 'g': m_oGate = flag; break;
// h
case 'i': m_oInline = flag; break;
// j
case 'k': m_oSubstConst = flag; break;
case 'l': m_oLife = flag; break;
case 'm': m_oAssemble = flag; break;
// n
case 'o':
m_oConstBitOpTree = flag;
break; // Can remove ~2022-01 when stable
// o will be used as an escape for a second character of optimization disables
case '0': optimize(0); break;
case '1': optimize(1); break;
case '2': optimize(2); break;
case '3': optimize(3); break;
case 'a': m_fTable = flag; break; // == -fno-table
case 'b': m_fCombine = flag; break; // == -fno-combine
case 'c': m_fConst = flag; break; // == -fno-const
case 'd': m_fDedupe = flag; break; // == -fno-dedup
case 'e': m_fCase = flag; break; // == -fno-case
case 'g': m_fGate = flag; break; // == -fno-gate
case 'i': m_fInline = flag; break; // == -fno-inline
case 'k': m_fSubstConst = flag; break; // == -fno-subst-const
case 'l': m_fLife = flag; break; // == -fno-life
case 'm': m_fAssemble = flag; break; // == -fno-assemble
case 'o': m_fConstBitOpTree = flag; break; // == -fno-const-bit-op-tree
case 'p':
m_public = !flag;
break; // With -Op so flag=0, we want public on so few optimizations done
// q
case 'r': m_oReorder = flag; break;
case 's': m_oSplit = flag; break;
case 't': m_oLifePost = flag; break;
case 'u': m_oSubst = flag; break;
case 'v': m_oReloop = flag; break;
case 'w': m_oMergeCond = flag; break;
case 'x': m_oExpand = flag; break;
case 'y': m_oAcycSimp = flag; break;
case 'z': m_oLocalize = flag; break;
default: break; // No error, just ignore
case 'r': m_fReorder = flag; break; // == -fno-reorder
case 's': m_fSplit = flag; break; // == -fno-split
case 't': m_fLifePost = flag; break; // == -fno-life-post
case 'u': m_fSubst = flag; break; // == -fno-subst
case 'v': m_fReloop = flag; break; // == -fno-reloop
case 'w': m_fMergeCond = flag; break; // == -fno-merge-cond
case 'x': m_fExpand = flag; break; // == -fno-expand
case 'y': m_fAcycSimp = flag; break; // == -fno-acyc-simp
case 'z': m_fLocalize = flag; break; // == -fno-localize
default:
break; // No error, just ignore
// LCOV_EXCL_STOP
}
}
});
@ -1352,7 +1383,7 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, char
DECL_OPTION("-trace-threads", CbVal, [this, fl](const char* valp) {
m_trace = true;
m_traceThreads = std::atoi(valp);
if (m_traceThreads < 0) fl->v3fatal("--trace-threads must be >= 0: " << valp);
if (m_traceThreads < 1) fl->v3fatal("--trace-threads must be >= 1: " << valp);
});
DECL_OPTION("-trace-underscore", OnOff, &m_traceUnderscore);
@ -1781,26 +1812,26 @@ int V3Options::dumpTreeLevel(const string& srcfile_path) {
void V3Options::optimize(int level) {
// Set all optimizations to on/off
const bool flag = level > 0;
m_oAcycSimp = flag;
m_oAssemble = flag;
m_oCase = flag;
m_oCombine = flag;
m_oConst = flag;
m_oConstBitOpTree = flag;
m_oDedupe = flag;
m_oExpand = flag;
m_oGate = flag;
m_oInline = flag;
m_oLife = flag;
m_oLifePost = flag;
m_oLocalize = flag;
m_oMergeCond = flag;
m_oReloop = flag;
m_oReorder = flag;
m_oSplit = flag;
m_oSubst = flag;
m_oSubstConst = flag;
m_oTable = flag;
m_fAcycSimp = flag;
m_fAssemble = flag;
m_fCase = flag;
m_fCombine = flag;
m_fConst = flag;
m_fConstBitOpTree = flag;
m_fDedupe = flag;
m_fExpand = flag;
m_fGate = flag;
m_fInline = flag;
m_fLife = flag;
m_fLifePost = flag;
m_fLocalize = flag;
m_fMergeCond = flag;
m_fReloop = flag;
m_fReorder = flag;
m_fSplit = flag;
m_fSubst = flag;
m_fSubstConst = flag;
m_fTable = flag;
// And set specific optimization levels
if (level >= 3) {
m_inlineMult = -1; // Maximum inlining

View File

@ -246,7 +246,6 @@ private:
bool m_lintOnly = false; // main switch: --lint-only
bool m_gmake = false; // main switch: --make gmake
bool m_main = false; // main swithc: --main
bool m_mergeConstPool = true; // main switch: --merge-const-pool
bool m_outFormatOk = false; // main switch: --cc, --sc or --sp was specified
bool m_pedantic = false; // main switch: --Wpedantic
bool m_pinsScUint = false; // main switch: --pins-sc-uint
@ -340,27 +339,27 @@ private:
V3LangCode m_defaultLanguage; // main switch: --language
// MEMBERS (optimizations)
// // main switch: -Op: --public
bool m_oAcycSimp; // main switch: -Oy: acyclic pre-optimizations
bool m_oAssemble; // main switch: -Om: assign assemble
bool m_oCase; // main switch: -Oe: case tree conversion
bool m_oCombine; // main switch: -Ob: common icode packing
bool m_oConst; // main switch: -Oc: constant folding
bool m_oConstBitOpTree; // main switch: -Oo: constant bit op tree
bool m_oDedupe; // main switch: -Od: logic deduplication
bool m_oExpand; // main switch: -Ox: expansion of C macros
bool m_oGate; // main switch: -Og: gate wire elimination
bool m_oInline; // main switch: -Oi: module inlining
bool m_oLife; // main switch: -Ol: variable lifetime
bool m_oLifePost; // main switch: -Ot: delayed assignment elimination
bool m_oLocalize; // main switch: -Oz: convert temps to local variables
bool m_oMergeCond; // main switch: -Ob: merge conditionals
bool m_oReloop; // main switch: -Ov: reform loops
bool m_oReorder; // main switch: -Or: reorder assignments in blocks
bool m_oSplit; // main switch: -Os: always assignment splitting
bool m_oSubst; // main switch: -Ou: substitute expression temp values
bool m_oSubstConst; // main switch: -Ok: final constant substitution
bool m_oTable; // main switch: -Oa: lookup table creation
bool m_fAcycSimp; // main switch: -fno-acyc-simp: acyclic pre-optimizations
bool m_fAssemble; // main switch: -fno-assemble: assign assemble
bool m_fCase; // main switch: -fno-case: case tree conversion
bool m_fCombine; // main switch: -fno-combine: common icode packing
bool m_fConst; // main switch: -fno-const: constant folding
bool m_fConstBitOpTree; // main switch: -fno-const-bit-op-tree constant bit op tree
bool m_fDedupe; // main switch: -fno-dedupe: logic deduplication
bool m_fExpand; // main switch: -fno-expand: expansion of C macros
bool m_fGate; // main switch: -fno-gate: gate wire elimination
bool m_fInline; // main switch: -fno-inline: module inlining
bool m_fLife; // main switch: -fno-life: variable lifetime
bool m_fLifePost; // main switch: -fno-life-post: delayed assignment elimination
bool m_fLocalize; // main switch: -fno-localize: convert temps to local variables
bool m_fMergeCond; // main switch: -fno-merge-cond: merge conditionals
bool m_fMergeConstPool = true; // main switch: --fmerge-const-pool
bool m_fReloop; // main switch: -fno-reloop: reform loops
bool m_fReorder; // main switch: -fno-reorder: reorder assignments in blocks
bool m_fSplit; // main switch: -fno-split: always assignment splitting
bool m_fSubst; // main switch: -fno-subst: substitute expression temp values
bool m_fSubstConst; // main switch: -fno-subst-const: final constant substitution
bool m_fTable; // main switch: -fno-table: lookup table creation
// clang-format on
bool m_available = false; // Set to true at the end of option parsing
@ -458,7 +457,6 @@ public:
bool traceStructs() const { return m_traceStructs; }
bool traceUnderscore() const { return m_traceUnderscore; }
bool main() const { return m_main; }
bool mergeConstPool() const { return m_mergeConstPool; }
bool outFormatOk() const { return m_outFormatOk; }
bool keepTempFiles() const { return (V3Error::debugDefault() != 0); }
bool pedantic() const { return m_pedantic; }
@ -516,8 +514,10 @@ public:
int traceMaxArray() const { return m_traceMaxArray; }
int traceMaxWidth() const { return m_traceMaxWidth; }
int traceThreads() const { return m_traceThreads; }
bool useTraceOffloadThread() const {
return traceThreads() == 0 ? 0 : traceThreads() - traceFormat().fst();
bool useTraceOffload() const { return trace() && traceFormat().fst() && traceThreads() > 1; }
bool useTraceParallel() const { return trace() && traceFormat().vcd() && threads() > 1; }
unsigned vmTraceThreads() const {
return useTraceParallel() ? threads() : useTraceOffload() ? 1 : 0;
}
int unrollCount() const { return m_unrollCount; }
int unrollStmts() const { return m_unrollStmts; }
@ -571,26 +571,27 @@ public:
bool isNoClocker(const string& signame) const;
// ACCESSORS (optimization options)
bool oAcycSimp() const { return m_oAcycSimp; }
bool oAssemble() const { return m_oAssemble; }
bool oCase() const { return m_oCase; }
bool oCombine() const { return m_oCombine; }
bool oConst() const { return m_oConst; }
bool oConstBitOpTree() const { return m_oConstBitOpTree; }
bool oDedupe() const { return m_oDedupe; }
bool oExpand() const { return m_oExpand; }
bool oGate() const { return m_oGate; }
bool oInline() const { return m_oInline; }
bool oLife() const { return m_oLife; }
bool oLifePost() const { return m_oLifePost; }
bool oLocalize() const { return m_oLocalize; }
bool oMergeCond() const { return m_oMergeCond; }
bool oReloop() const { return m_oReloop; }
bool oReorder() const { return m_oReorder; }
bool oSplit() const { return m_oSplit; }
bool oSubst() const { return m_oSubst; }
bool oSubstConst() const { return m_oSubstConst; }
bool oTable() const { return m_oTable; }
bool fAcycSimp() const { return m_fAcycSimp; }
bool fAssemble() const { return m_fAssemble; }
bool fCase() const { return m_fCase; }
bool fCombine() const { return m_fCombine; }
bool fConst() const { return m_fConst; }
bool fConstBitOpTree() const { return m_fConstBitOpTree; }
bool fDedupe() const { return m_fDedupe; }
bool fExpand() const { return m_fExpand; }
bool fGate() const { return m_fGate; }
bool fInline() const { return m_fInline; }
bool fLife() const { return m_fLife; }
bool fLifePost() const { return m_fLifePost; }
bool fLocalize() const { return m_fLocalize; }
bool fMergeCond() const { return m_fMergeCond; }
bool fMergeConstPool() const { return m_fMergeConstPool; }
bool fReloop() const { return m_fReloop; }
bool fReorder() const { return m_fReorder; }
bool fSplit() const { return m_fSplit; }
bool fSubst() const { return m_fSubst; }
bool fSubstConst() const { return m_fSubstConst; }
bool fTable() const { return m_fTable; }
string traceClassBase() const { return m_traceFormat.classBase(); }
string traceClassLang() const { return m_traceFormat.classBase() + (systemC() ? "Sc" : "C"); }

View File

@ -133,7 +133,7 @@ private:
&& !constp->num().isString(); // Not a string
if (useConstPool) {
// Extract into constant pool.
const bool merge = v3Global.opt.mergeConstPool();
const bool merge = v3Global.opt.fMergeConstPool();
varp = v3Global.rootp()->constPoolp()->findConst(constp, merge)->varp();
nodep->deleteTree();
++m_extractedToConstPool;

View File

@ -180,6 +180,10 @@ private:
TraceActivityVertex* const m_alwaysVtxp; // "Always trace" vertex
bool m_finding = false; // Pass one of algorithm?
// Trace parallelism. Only VCD tracing can be parallelized at this time.
const uint32_t m_parallelism
= v3Global.opt.useTraceParallel() ? static_cast<uint32_t>(v3Global.opt.threads()) : 1;
VDouble0 m_statUniqSigs; // Statistic tracking
VDouble0 m_statUniqCodes; // Statistic tracking
@ -388,7 +392,7 @@ private:
if (!it->second->duplicatep()) {
uint32_t cost = 0;
const AstTraceDecl* const declp = it->second->nodep();
// The number of comparisons required by tracep->chg*
// The number of comparisons required by bufp->chg*
cost += declp->isWide() ? declp->codeInc() : 1;
// Arrays are traced by element
cost *= declp->arrayRange().ranged() ? declp->arrayRange().elements() : 1;
@ -494,7 +498,7 @@ private:
};
if (isTopFunc) {
// Top functions
funcp->argTypes("void* voidSelf, " + v3Global.opt.traceClassBase() + "* tracep");
funcp->argTypes("void* voidSelf, " + v3Global.opt.traceClassBase() + "::Buffer* bufp");
addInitStr(voidSelfAssign(m_topModp));
addInitStr(symClassAssign());
// Add global activity check to change dump functions
@ -508,32 +512,33 @@ private:
m_regFuncp->addStmtsp(new AstText(flp, "tracep->addChgCb(", true));
}
m_regFuncp->addStmtsp(new AstAddrOfCFunc(flp, funcp));
m_regFuncp->addStmtsp(new AstText(flp, ", vlSelf);\n", true));
const string threadPool{m_parallelism > 1 ? "vlSymsp->__Vm_threadPoolp" : "nullptr"};
m_regFuncp->addStmtsp(new AstText(flp, ", vlSelf, " + threadPool + ");\n", true));
} else {
// Sub functions
funcp->argTypes(v3Global.opt.traceClassBase() + "* tracep");
funcp->argTypes(v3Global.opt.traceClassBase() + "::Buffer* bufp");
// Setup base references. Note in rare occasions we can end up with an empty trace
// sub function, hence the VL_ATTR_UNUSED attributes.
if (full) {
// Full dump sub function
addInitStr("uint32_t* const oldp VL_ATTR_UNUSED = "
"tracep->oldp(vlSymsp->__Vm_baseCode);\n");
"bufp->oldp(vlSymsp->__Vm_baseCode);\n");
} else {
// Change dump sub function
if (v3Global.opt.useTraceOffloadThread()) {
if (v3Global.opt.useTraceOffload()) {
addInitStr("const uint32_t base VL_ATTR_UNUSED = "
"vlSymsp->__Vm_baseCode + "
+ cvtToStr(baseCode) + ";\n");
addInitStr("if (false && tracep) {} // Prevent unused\n");
addInitStr("if (false && bufp) {} // Prevent unused\n");
} else {
addInitStr("uint32_t* const oldp VL_ATTR_UNUSED = "
"tracep->oldp(vlSymsp->__Vm_baseCode + "
"bufp->oldp(vlSymsp->__Vm_baseCode + "
+ cvtToStr(baseCode) + ");\n");
}
}
// Add call to top function
AstCCall* const callp = new AstCCall(funcp->fileline(), funcp);
callp->argTypes("tracep");
callp->argTypes("bufp");
topFuncp->addStmtsp(callp);
}
// Done
@ -728,7 +733,7 @@ private:
// We will split functions such that each have to dump roughly the same amount of data
// for this we need to keep tack of the number of codes used by the trace functions.
uint32_t nFullCodes = 0; // Number of non-duplicate codes (need to go into full* dump)
uint32_t nChgCodes = 0; // Number of non-consant codes (need to go in to chg* dump)
uint32_t nChgCodes = 0; // Number of non-constant codes (need to go in to chg* dump)
sortTraces(traces, nFullCodes, nChgCodes);
UINFO(5, "nFullCodes: " << nFullCodes << " nChgCodes: " << nChgCodes << endl);
@ -747,13 +752,11 @@ private:
m_regFuncp->isLoose(true);
m_topScopep->addActivep(m_regFuncp);
const int parallelism = 1; // Note: will bump this later, code below works for any value
// Create the full dump functions, also allocates signal numbers
createFullTraceFunction(traces, nFullCodes, parallelism);
createFullTraceFunction(traces, nFullCodes, m_parallelism);
// Create the incremental dump functions
createChgTraceFunctions(traces, nChgCodes, parallelism);
createChgTraceFunctions(traces, nChgCodes, m_parallelism);
// Remove refs to traced values from TraceDecl nodes, these have now moved under
// TraceInc

View File

@ -504,6 +504,7 @@ private:
// width: LHS + RHS
AstNodeDType* const vdtypep = m_vup->dtypeNullSkipRefp();
userIterate(vdtypep, WidthVP(SELF, BOTH).p());
// Conversions
if (VN_IS(vdtypep, QueueDType)) {
// Queue "element 0" is lhsp, so we need to swap arguments
auto* const newp = new AstConsQueue(nodep->fileline(), nodep->rhsp()->unlinkFrBack(),
@ -521,6 +522,16 @@ private:
userIterateChildren(newp, m_vup);
return;
}
if (VN_IS(vdtypep, UnpackArrayDType)) {
auto* const newp = new AstPattern{nodep->fileline(), nullptr};
patConcatConvertRecurse(newp, nodep);
nodep->replaceWith(newp);
VL_DO_DANGLING(pushDeletep(nodep), nodep);
userIterate(newp, m_vup);
return;
}
// Concat handling
if (m_vup->prelim()) {
if (VN_IS(vdtypep, AssocArrayDType) //
|| VN_IS(vdtypep, DynArrayDType) //
@ -662,7 +673,8 @@ private:
}
AstNodeDType* const vdtypep = m_vup->dtypeNullSkipRefp();
if (VN_IS(vdtypep, QueueDType) || VN_IS(vdtypep, DynArrayDType)) {
if (VN_IS(vdtypep, QueueDType) || VN_IS(vdtypep, DynArrayDType)
|| VN_IS(vdtypep, UnpackArrayDType)) {
if (times != 1)
nodep->v3warn(E_UNSUPPORTED, "Unsupported: Non-1 replication to form "
<< vdtypep->prettyDTypeNameQ()
@ -674,7 +686,7 @@ private:
VL_DO_DANGLING(pushDeletep(nodep), nodep);
return;
}
if (VN_IS(vdtypep, AssocArrayDType) || VN_IS(vdtypep, UnpackArrayDType)) {
if (VN_IS(vdtypep, AssocArrayDType)) {
nodep->v3warn(E_UNSUPPORTED, "Unsupported: Replication to form "
<< vdtypep->prettyDTypeNameQ() << " data type");
}
@ -6236,6 +6248,21 @@ private:
return patmap;
}
void patConcatConvertRecurse(AstPattern* patternp, AstConcat* nodep) {
if (AstConcat* lhsp = VN_CAST(nodep->lhsp(), Concat)) {
patConcatConvertRecurse(patternp, lhsp);
} else {
patternp->addItemsp(new AstPatMember{nodep->lhsp()->fileline(),
nodep->lhsp()->unlinkFrBack(), nullptr, nullptr});
}
if (AstConcat* rhsp = VN_CAST(nodep->rhsp(), Concat)) {
patConcatConvertRecurse(patternp, rhsp);
} else {
patternp->addItemsp(new AstPatMember{nodep->rhsp()->fileline(),
nodep->rhsp()->unlinkFrBack(), nullptr, nullptr});
}
}
void makeOpenArrayShell(AstNodeFTaskRef* nodep) {
UINFO(4, "Replicate openarray function " << nodep->taskp() << endl);
AstNodeFTask* const oldTaskp = nodep->taskp();

View File

@ -237,7 +237,7 @@ static void process() {
// Module inlining
// Cannot remove dead variables after this, as alias information for final
// V3Scope's V3LinkDot is in the AstVar.
if (v3Global.opt.oInline()) {
if (v3Global.opt.fInline()) {
V3Inline::inlineAll(v3Global.rootp());
V3LinkDot::linkDotArrayed(v3Global.rootp()); // Cleanup as made new modules
}
@ -308,11 +308,11 @@ static void process() {
// Push constants across variables and remove redundant assignments
V3Const::constifyAll(v3Global.rootp());
if (v3Global.opt.oLife()) V3Life::lifeAll(v3Global.rootp());
if (v3Global.opt.fLife()) V3Life::lifeAll(v3Global.rootp());
// Make large low-fanin logic blocks into lookup tables
// This should probably be done much later, once we have common logic elimination.
if (!v3Global.opt.lintOnly() && v3Global.opt.oTable()) {
if (!v3Global.opt.lintOnly() && v3Global.opt.fTable()) {
V3Table::tableAll(v3Global.rootp());
}
@ -326,7 +326,7 @@ static void process() {
V3Active::activeAll(v3Global.rootp());
// Split single ALWAYS blocks into multiple blocks for better ordering chances
if (v3Global.opt.oSplit()) V3Split::splitAlwaysAll(v3Global.rootp());
if (v3Global.opt.fSplit()) V3Split::splitAlwaysAll(v3Global.rootp());
V3SplitAs::splitAsAll(v3Global.rootp());
// Create tracing sample points, before we start eliminating signals
@ -338,11 +338,11 @@ static void process() {
// Gate-based logic elimination; eliminate signals and push constant across cell boundaries
// Instant propagation makes lots-o-constant reduction possibilities.
if (v3Global.opt.oGate()) {
if (v3Global.opt.fGate()) {
V3Gate::gateAll(v3Global.rootp());
// V3Gate calls constant propagation itself.
} else {
v3info("Command Line disabled gate optimization with -Og/-O0. "
v3info("Command Line disabled gate optimization with -fno-gate. "
"This may cause ordering problems.");
}
@ -361,7 +361,7 @@ static void process() {
}
// Reorder assignments in pipelined blocks
if (v3Global.opt.oReorder()) V3Split::splitReorderAll(v3Global.rootp());
if (v3Global.opt.fReorder()) V3Split::splitReorderAll(v3Global.rootp());
// Create delayed assignments
// This creates lots of duplicate ACTIVES so ActiveTop needs to be after this step
@ -383,12 +383,12 @@ static void process() {
// Cleanup any dly vars or other temps that are simple assignments
// Life must be done before Subst, as it assumes each CFunc under
// _eval is called only once.
if (v3Global.opt.oLife()) {
if (v3Global.opt.fLife()) {
V3Const::constifyAll(v3Global.rootp());
V3Life::lifeAll(v3Global.rootp());
}
if (v3Global.opt.oLifePost()) V3LifePost::lifepostAll(v3Global.rootp());
if (v3Global.opt.fLifePost()) V3LifePost::lifepostAll(v3Global.rootp());
// Remove unused vars
V3Const::constifyAll(v3Global.rootp());
@ -415,13 +415,13 @@ static void process() {
v3Global.assertScoped(false);
// Move variables from modules to function local variables where possible
if (v3Global.opt.oLocalize()) V3Localize::localizeAll(v3Global.rootp());
if (v3Global.opt.fLocalize()) V3Localize::localizeAll(v3Global.rootp());
// Remove remaining scopes; make varrefs/funccalls relative to current module
V3Descope::descopeAll(v3Global.rootp());
// Icache packing; combine common code in each module's functions into subroutines
if (v3Global.opt.oCombine()) V3Combine::combineAll(v3Global.rootp());
if (v3Global.opt.fCombine()) V3Combine::combineAll(v3Global.rootp());
}
V3Error::abortIfErrors();
@ -445,30 +445,30 @@ static void process() {
}
// Expand macros and wide operators into C++ primitives
if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly() && v3Global.opt.oExpand()) {
if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly() && v3Global.opt.fExpand()) {
V3Expand::expandAll(v3Global.rootp());
}
// Propagate constants across WORDSEL arrayed temporaries
if (!v3Global.opt.xmlOnly() && v3Global.opt.oSubst()) {
if (!v3Global.opt.xmlOnly() && v3Global.opt.fSubst()) {
// Constant folding of expanded stuff
V3Const::constifyCpp(v3Global.rootp());
V3Subst::substituteAll(v3Global.rootp());
}
if (!v3Global.opt.xmlOnly() && v3Global.opt.oSubstConst()) {
if (!v3Global.opt.xmlOnly() && v3Global.opt.fSubstConst()) {
// Constant folding of substitutions
V3Const::constifyCpp(v3Global.rootp());
V3Dead::deadifyAll(v3Global.rootp());
}
if (!v3Global.opt.lintOnly() && !v3Global.opt.xmlOnly()) {
if (v3Global.opt.oMergeCond()) {
if (v3Global.opt.fMergeCond()) {
// Merge conditionals
V3MergeCond::mergeAll(v3Global.rootp());
}
if (v3Global.opt.oReloop()) {
if (v3Global.opt.fReloop()) {
// Reform loops to reduce code size
// Must be after all Sel/array index based optimizations
V3Reloop::reloopAll(v3Global.rootp());

View File

@ -77,7 +77,6 @@ my $opt_gdbbt;
my $opt_gdbsim;
my $opt_hashset;
my $opt_jobs = 1;
my $opt_optimize;
my $opt_quiet;
my $opt_rerun;
my $opt_rrsim;
@ -104,7 +103,6 @@ if (! GetOptions(
"hashset=s" => \$opt_hashset,
"help" => \&usage,
"j=i" => \$opt_jobs,
"optimize:s" => \$opt_optimize,
"quiet!" => \$opt_quiet,
"rerun!" => \$opt_rerun,
"rr!" => \$opt_rr,
@ -661,7 +659,7 @@ sub new {
verilator_define => 'VERILATOR',
verilator_flags => ["-cc",
"-Mdir $self->{obj_dir}",
"-OD", # As currently disabled unless -O3
"--fdedup", # As currently disabled unless -O3
"--debug-check",
"--comp-limit-members 10", ],
verilator_flags2 => [],
@ -924,7 +922,6 @@ sub compile_vlt_flags {
unshift @verilator_flags, "--trace" if $opt_trace;
my $threads = ::calc_threads($Vltmt_threads);
unshift @verilator_flags, "--threads $threads" if $param{vltmt} && $checkflags !~ /-threads /;
unshift @verilator_flags, "--trace-threads 1" if $param{vltmt} && $checkflags =~ /-trace /;
unshift @verilator_flags, "--trace-threads 2" if $param{vltmt} && $checkflags =~ /-trace-fst /;
unshift @verilator_flags, "--debug-partition" if $param{vltmt};
unshift @verilator_flags, "-CFLAGS -ggdb -LDFLAGS -ggdb" if $opt_gdbsim;
@ -935,19 +932,6 @@ sub compile_vlt_flags {
$param{make_main} && $param{verilator_make_gmake};
unshift @verilator_flags, "../" . $self->{main_filename} if
$param{make_main} && $param{verilator_make_gmake};
if (defined $opt_optimize) {
my $letters = "";
if ($opt_optimize =~ /[a-zA-Z]/) {
$letters = $opt_optimize;
} else { # Randomly turn on/off different optimizations
foreach my $l ('a' .. 'z') {
$letters .= ((rand() > 0.5) ? $l : uc $l);
}
unshift @verilator_flags, "--trace" if rand() > 0.5;
unshift @verilator_flags, "--coverage" if rand() > 0.5;
}
unshift @verilator_flags, "--O" . $letters;
}
my @cmdargs = (
"--prefix " . $param{VM_PREFIX},
@ -2907,11 +2891,6 @@ Displays this message and program version and exits.
Run number of parallel tests, or 0 to determine the count based on the
number of cores installed. Requires Perl's Parallel::Forker package.
=item --optimize
Randomly turn on/off different optimizations. With specific flags,
use those optimization settings
=item --quiet
Suppress all output except for failures and progress messages every 15

View File

@ -15,7 +15,7 @@ top_filename("t/t_altera_lpm.v");
$module =~ s/_noinl//;
compile(
verilator_flags2 => ["--top-module ${module}", "-Oi"]
verilator_flags2 => ["--top-module ${module}", "-fno-inline"]
);
ok(1);

View File

@ -12,7 +12,7 @@ scenarios(vlt_all => 1);
top_filename("t/t_alw_reorder.v");
compile(
verilator_flags2 => ["--stats -Or"],
verilator_flags2 => ["--stats -fno-reorder"],
);
file_grep($Self->{stats}, qr/Optimizations, Split always\s+(\d+)/i, 0);

View File

@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
scenarios(simulator => 1);
compile(
verilator_flags2 => ["-O0 -OG"],
verilator_flags2 => ["-O0 -fgate"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t_assign_slice_overflow.v");
compile(
verilator_flags2 => ["-Ox"],
verilator_flags2 => ["-fno-expand"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(vlt => 1);
top_filename("t/t_case_66bits.v");
compile(
verilator_flags2 => ['-Ox'],
verilator_flags2 => ['-fno-expand'],
);
execute(

View File

@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
scenarios(simulator => 1);
compile(
verilator_flags2 => ["--trace --Os -x-assign 0"],
verilator_flags2 => ["--trace --fno-split -x-assign 0"],
);
execute(

View File

@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
scenarios(simulator => 1);
compile(
verilator_flags2 => ["--stats --O3 -x-assign fast"],
verilator_flags2 => ["--stats -O3 -x-assign fast"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(vlt => 1);
top_filename("t/t_case_write1.v");
compile(
verilator_flags2 => ['-Ox'],
verilator_flags2 => ['-fno-expand'],
);
execute(

View File

@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
scenarios(simulator => 1);
compile(
verilator_flags2 => ["--stats --O3 -x-assign fast"],
verilator_flags2 => ["--stats -O3 -x-assign fast"],
);
execute(

View File

@ -2,29 +2,20 @@
if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; }
# DESCRIPTION: Verilator: Verilog Test driver/expect definition
#
# Copyright 2003-2013 by Wilson Snyder. This program is free software; you
# Copyright 2022 by Wilson Snyder. This program is free software; you
# can redistribute it and/or modify it under the terms of either the GNU
# Lesser General Public License Version 3 or the Perl Artistic License
# Version 2.0.
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
scenarios(vlt => 1);
scenarios(simulator => 1);
compile(
make_top_shell => 0,
make_main => 0,
v_flags2 => ["--trace --exe $Self->{t_dir}/t_trace_c_api.cpp",
"-CFLAGS -DVERILATED_VCD_TEST",
"-CFLAGS -DVL_TRACE_VCD_OLD_API"],
);
execute(
check_finished => 1,
);
# vcddiff bug crashes
#vcd_identical("$Self->{obj_dir}/simx.vcd",
# $Self->{golden_filename});
ok(1);
1;

View File

@ -0,0 +1,36 @@
// DESCRIPTION: Verilator: Verilog Test module
//
// This file ONLY is placed under the Creative Commons Public Domain, for
// any use, without warranty, 2022 by Wilson Snyder.
// SPDX-License-Identifier: CC0-1.0
module t(/*AUTOARG*/
// Inputs
clk
);
input clk;
wire [31:0] arr [0:7];
assign arr[0:7] = {
{16'hffff, 16'h0000},
{16'h0000, 16'h0000},
{16'h0a0a, 16'h0000},
{16'ha0a0, 16'h0000},
{16'hffff, 16'h0000},
{16'h0000, 16'h0000},
{16'h0a0a, 16'h0000},
{16'ha0a0, 16'h0000}
};
int cyc = 0;
always @(posedge clk) begin
cyc <= cyc + 1;
if (cyc == 9) begin
if (arr[0] !== 32'hffff0000) $stop;
if (arr[7] !== 32'ha0a00000) $stop;
$write("*-* All Finished *-*\n");
$finish;
end
end
endmodule

View File

@ -13,7 +13,7 @@ top_filename("t/t_const_opt.v");
# Run the same design as t_const_opt.pl without bitopt tree optimization to make sure that the result is same.
compile(
verilator_flags2 => ["-Wno-UNOPTTHREADS", "--stats", "-Oo", "$Self->{t_dir}/t_const_opt.cpp"],
verilator_flags2 => ["-Wno-UNOPTTHREADS", "--stats", "-fno-const-bit-op-tree", "$Self->{t_dir}/t_const_opt.cpp"],
);
execute(

View File

@ -18,5 +18,8 @@ execute(
check_finished => 1,
);
if ($Self->{vlt}) {
file_grep($Self->{stats}, qr/Optimizations, Const bit op reduction\s+(\d+)/i, 11);
}
ok(1);
1;

View File

@ -4,6 +4,11 @@
// any use, without warranty, 2021 Yutetsu TAKATSUKASA.
// SPDX-License-Identifier: CC0-1.0
// This function always returns 0, so safe to take bitwise OR with any value.
// Calling this function stops constant folding as Verialtor does not know
// what this function returns.
import "DPI-C" context function int fake_dependency();
module t(/*AUTOARG*/
// Inputs
clk
@ -57,7 +62,8 @@ module t(/*AUTOARG*/
$write("[%0t] cyc==%0d crc=%x sum=%x\n", $time, cyc, crc, sum);
if (crc !== 64'hc77bb9b3784ea091) $stop;
// What checksum will we end up with (above print should match)
`define EXPECTED_SUM 64'hcae926ece668f35d
`define EXPECTED_SUM 64'hdccb9e7b8b638233
if (sum !== `EXPECTED_SUM) $stop;
$write("*-* All Finished *-*\n");
$finish;
@ -79,10 +85,11 @@ module Test(/*AUTOARG*/
logic d0, d1, d2, d3, d4, d5, d6, d7;
logic bug3182_out;
logic bug3197_out;
logic bug3445_out;
output logic o;
logic [6:0] tmp;
logic [7:0] tmp;
assign o = ^tmp;
always_ff @(posedge clk) begin
@ -105,10 +112,12 @@ module Test(/*AUTOARG*/
tmp[4] <= i[0] & (i[1] & (i[2] & (i[3] | d[4]))); // ConstBitOpTreeVisitor::m_frozenNodes
tmp[5] <= bug3182_out;
tmp[6] <= bug3197_out;
tmp[7] <= bug3445_out;
end
bug3182 i_bug3182(.in(d[4:0]), .out(bug3182_out));
bug3197 i_bug3197(.clk(clk), .in(d), .out(bug3197_out));
bug3445 i_bug3445(.clk(clk), .in(d), .out(bug3445_out));
endmodule
@ -116,11 +125,6 @@ module bug3182(in, out);
input wire [4:0] in;
output wire out;
// This function always returns 0, so safe to take bitwise OR with any value.
// Calling this function stops constant folding as Verialtor does not know
// what this function returns.
import "DPI-C" context function int fake_dependency();
logic [4:0] bit_source;
/* verilator lint_off WIDTH */
@ -140,3 +144,62 @@ module bug3197(input wire clk, input wire [31:0] in, output out);
wire tmp0 = (|d[38:0]);
assign out = (d[39] | tmp0);
endmodule
// Bug #3445
// An unoptimized node is kept as frozen node, but its LSB and polarity were not saved.
// AST of RHS of result0 looks as below:
// AND(SHIFTR(AND(WORDSEL(ARRAYSEL(VARREF)), WORDSEL(ARRAYSEL(VARREF)))), 32'd11)
// ~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~
// Two of WORDSELs are frozen nodes. They are under SHIFTR of 11 bits.
//
// Fixing #3445 needs to
// 1. Take AstShiftR and AstNot into op count when diciding optimizable or not
// (result0 and result2 in the test)
// 2. Insert AstShiftR if LSB of the frozen node is not 0 (result1 in the test)
// 3. Insert AstNot if polarity of the frozen node is false (resutl3 in the
// test)
module bug3445(input wire clk, input wire [31:0] in, output wire out);
logic [127:0] d;
always_ff @(posedge clk)
d <= {d[95:0], in};
typedef struct packed {
logic a;
logic [ 2:0] b;
logic [ 2:0] c;
logic [ 1:0] d;
logic [ 7:0] e;
logic [31:0] f;
logic [ 3:0] g;
logic [31:0] h;
logic i;
logic [41:0] j;
} packed_struct;
packed_struct st[4];
// This is always 1'b0, but Verilator cannot notice it.
// This signal helps to reveal wrong optimization of result2 and result3.
logic zero;
always_ff @(posedge clk) begin
st[0] <= d;
st[1] <= st[0];
st[2] <= st[1];
st[3] <= st[2];
zero <= fake_dependency() > 0;
end
logic result0, result1, result2, result3;
always_ff @(posedge clk) begin
// Cannot optimize further.
result0 <= (st[0].g[0] & st[0].h[0]) & (in[0] == 1'b0);
// There are redundant !in[0] terms. They should be simplified.
result1 <= (!in[0] & (st[1].g[0] & st[1].h[0])) & ((in[0] == 1'b0) & !in[0]);
// Cannot optimize further.
result2 <= !(st[2].g[0] & st[2].h[0]) & (zero == 1'b0);
// There are redundant zero terms. They should be simplified.
result3 <= (!zero & !(st[3].g[0] & st[3].h[0])) & ((zero == 1'b0) & !zero);
end
assign out = result0 ^ result1 ^ (result2 | result3);
endmodule

View File

@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
scenarios(simulator => 1);
compile(
verilator_flags2 => ['--Ox'],
verilator_flags2 => ['--fno-expand'],
);
execute(

View File

@ -14,7 +14,7 @@ top_filename("t/t_extract_static_const.v");
golden_filename("t/t_extract_static_const.out");
compile(
verilator_flags2 => ["--stats", "--no-merge-const-pool"],
verilator_flags2 => ["--stats", "--fno-merge-const-pool"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(vlt => 1);
top_filename("t/t_func_twocall.v");
compile(
verilator_flags2 => ['-Ox'],
verilator_flags2 => ['-fno-expand'],
);
execute(

View File

@ -16,7 +16,7 @@ scenarios(simulator => 1);
$Self->{sim_time} = 11000;
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -11,7 +11,7 @@ if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); di
scenarios(simulator => 1);
compile(
verilator_flags2 => ["--Os -x-assign 0"],
verilator_flags2 => ["--fno-split -x-assign 0"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_inst_slice.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface1_modport.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface1.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface2.v");
compile(
verilator_flags2 => ["--top-module t -Oi"],
verilator_flags2 => ["--top-module t -fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_array2.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_array.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_down.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_gen10.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_gen11.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_gen12.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_gen2.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_gen3.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_gen4.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_gen5.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_gen6.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_gen7.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_gen8.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_gen9.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_gen.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -14,7 +14,7 @@ top_filename("t/t_interface.v");
compile(
# Avoid inlining so we find bugs in the non-inliner connection code
verilator_flags2 => ["-Oi"],
verilator_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_modport_import.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -14,7 +14,7 @@ top_filename("t/t_interface_modport.v");
compile(
# Avoid inlining so we find bugs in the non-inliner connection code
verilator_flags2 => ["-Oi"],
verilator_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_modport.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_mp_func.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_nest.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(simulator => 1);
top_filename("t/t_interface_twod.v");
compile(
v_flags2 => ["-Oi"],
v_flags2 => ["-fno-inline"],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(linter => 1);
top_filename("t/t_lint_setout_bad.v");
lint(
verilator_flags2 => ["--lint-only -Oi"],
verilator_flags2 => ["--lint-only -fno-inline"],
fails => 1,
expect_filename => $Self->{golden_filename},
);

View File

@ -13,7 +13,7 @@ scenarios(vlt => 1);
top_filename("t/t_math_cond_huge.v");
compile(
verilator_flags2 => ['-Ox'],
verilator_flags2 => ['-fno-expand'],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(vlt => 1);
top_filename("t/t_math_div.v");
compile(
verilator_flags2 => ['-Ox'],
verilator_flags2 => ['-fno-expand'],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(vlt => 1);
top_filename("t/t_math_eq.v");
compile(
verilator_flags2 => ['-Ox'],
verilator_flags2 => ['-fno-expand'],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(vlt => 1);
top_filename("t/t_math_red.v");
compile(
verilator_flags2 => ['-Ox'],
verilator_flags2 => ['-fno-expand'],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(vlt => 1);
top_filename("t/t_math_shift.v");
compile(
verilator_flags2 => ['-Ox'],
verilator_flags2 => ['-fno-expand'],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(vlt => 1);
top_filename("t/t_math_signed.v");
compile(
verilator_flags2 => ['-Ox'],
verilator_flags2 => ['-fno-expand'],
);
execute(

View File

@ -13,7 +13,7 @@ scenarios(vlt => 1);
top_filename("t/t_math_vliw.v");
compile(
verilator_flags2 => ['-Ox'],
verilator_flags2 => ['-fno-expand'],
);
execute(

View File

@ -12,7 +12,7 @@ scenarios(simulator => 1);
compile(
# Disable inlining, this test is trivial without it
verilator_flags2 => ["-Oi --trace"],
verilator_flags2 => ["-fno-inline --trace"],
verilator_flags3 => [],
);

Some files were not shown because too many files have changed in this diff Show More