Improve `--skip-identical` to skip on identical input file contents (#6109).

This commit is contained in:
Wilson Snyder 2025-07-26 02:43:57 -04:00
parent 20bd80698d
commit d289934ca9
6 changed files with 110 additions and 46 deletions

View File

@ -24,6 +24,7 @@ Verilator 5.039 devel
* Support multiple variables on RHS of a `force` assignment (#6163). [Artur Bieniek, Antmicro Ltd.]
* Support covergroup extends, etc., as unsupported (#6160). [Artur Bieniek, Antmicro Ltd.]
* Change control file `public_flat_*` and other signal attributes to support __ in names (#6140).
* Improve `--skip-identical` to skip on identical input file contents (#6109).
* Optimize to return memory when using -build (#6192) (#6226). [Michael B. Taylor]
* Optimize 2 ** X to 1 << X if base is signed (#6203). [Max Wipfli]
* Optimize more complex combinational assignments in DFG (#6205) (#6209). [Geza Lore]

View File

@ -60,6 +60,8 @@ VL_DEFINE_DEBUG_FUNCTIONS;
constexpr int INFILTER_IPC_BUFSIZ = (64 * 1024); // For debug, try this as a small number
constexpr int INFILTER_CACHE_MAX = (64 * 1024); // Maximum bytes to cache if same file read twice
constexpr off_t FILE_HASH_SIZE_MAX = 1 * 1024 * 1024; // Maxium size file to hash
//######################################################################
// V3File Internal state
@ -71,6 +73,7 @@ class V3FileDependImp final {
bool m_exists = true;
const string m_filename; // Filename
struct stat m_stat; // Stat information
VHashSha256 m_hash; // SHA hash of file contents
public:
DependFile(const string& filename, bool target)
: m_target{target}
@ -88,6 +91,11 @@ class V3FileDependImp final {
time_t cnstime() const { return VL_STAT_CTIME_NSEC(m_stat); } // Nanoseconds
time_t mstime() const { return m_stat.st_mtime; } // Seconds
time_t mnstime() const { return VL_STAT_MTIME_NSEC(m_stat); } // Nanoseconds
string hashDigestSymbol() {
static VHashSha256 emptyHash;
return m_hash.digestSymbol() != emptyHash.digestSymbol() ? m_hash.digestSymbol()
: "unhashed";
}
void loadStats() {
if (!m_stat.st_mtime) {
const string fn = filename();
@ -98,6 +106,9 @@ class V3FileDependImp final {
m_exists = false;
// Not an error... This can occur due to `line directives in the .vpp files
UINFO(1, "-Info: File not statable: " << filename());
} else {
// For speed, don't hash large files (e.g. verilator_bin)
if (!target() && size() <= FILE_HASH_SIZE_MAX) m_hash.insertFile(filename());
}
}
}
@ -186,28 +197,28 @@ void V3FileDependImp::writeTimes(const string& filename, const string& cmdlineIn
<< "IPTION: Verilator output: Timestamp data for --skip-identical. Delete at will.\n";
*ofp << "C \"" << cmdline << "\"\n";
for (std::set<DependFile>::iterator iter = m_filenameList.begin();
iter != m_filenameList.end(); ++iter) {
for (auto& it : m_filenameList) {
DependFile* const depfilep = const_cast<DependFile*>(&it);
// Read stats of files we create after we're done making them
// (except for this file, of course)
DependFile* const dfp = const_cast<DependFile*>(&(*iter));
V3Os::filesystemFlush(dfp->filename());
dfp->loadStats();
off_t showSize = iter->size();
ino_t showIno = iter->ino();
if (dfp->filename() == filename) {
V3Os::filesystemFlush(depfilep->filename());
depfilep->loadStats();
off_t showSize = depfilep->size();
ino_t showIno = depfilep->ino();
if (depfilep->filename() == filename) {
showSize = 0;
showIno = 0; // We're writing it, so need to ignore it
}
*ofp << (iter->target() ? "T" : "S") << " ";
*ofp << (depfilep->target() ? "T" : "S") << " ";
*ofp << " " << std::setw(8) << showSize;
*ofp << " " << std::setw(8) << showIno;
*ofp << " " << std::setw(11) << iter->cstime();
*ofp << " " << std::setw(11) << iter->cnstime();
*ofp << " " << std::setw(11) << iter->mstime();
*ofp << " " << std::setw(11) << iter->mnstime();
*ofp << " \"" << iter->filename() << "\"";
*ofp << " " << std::setw(11) << depfilep->cstime();
*ofp << " " << std::setw(11) << depfilep->cnstime();
*ofp << " " << std::setw(11) << depfilep->mstime();
*ofp << " " << std::setw(11) << depfilep->mnstime();
*ofp << " \"" << depfilep->hashDigestSymbol() << "\"";
*ofp << " \"" << depfilep->filename() << "\"";
*ofp << '\n';
}
}
@ -236,6 +247,8 @@ bool V3FileDependImp::checkTimes(const string& filename, const string& cmdlineIn
}
}
const bool skipHashing = !V3Os::getenvStr("VERILATOR_DEBUG_SKIP_HASH", "").empty();
while (!ifp->eof()) {
char chkDir;
*ifp >> chkDir;
@ -254,39 +267,55 @@ bool V3FileDependImp::checkTimes(const string& filename, const string& cmdlineIn
*ifp >> chkMnstime;
char quote;
*ifp >> quote;
const string chkHash = V3Os::getline(*ifp, '"');
*ifp >> quote;
const string chkFilename = V3Os::getline(*ifp, '"');
V3Os::filesystemFlush(chkFilename);
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
struct stat chkStat;
const int err = stat(chkFilename.c_str(), &chkStat);
struct stat curStat;
const int err = stat(chkFilename.c_str(), &curStat);
if (err != 0) {
UINFO(2, " --check-times failed: missing " << chkFilename);
return false;
}
// UINFO(9," got d="<<chkDir<<" s="<<chkSize<<" ct="<<chkCstime<<"."
// <<chkCnstime<<" mt="<<chkMstime<<"."<<chkMnstime<<" fn = "<<chkFilename<<endl);
// UINFO(9," nowSt s="<<chkStat.st_size<<" mt="<<chkStat.st_mtime<<"
// ct="<<chkStat.st_ctime<<" fn = "<<chkFilename<<endl);
if (filename != chkFilename) { // Other then the .dat file itself, as we were writing it
// at the time...
// We'd like this rule:
// if (!(chkStat.st_size == chkSize
// && chkStat.st_mtime == chkMstime) {
// However NFS messes us up, as there might be some data outstanding when
// we determined the original size. For safety, we know the creation time
// must be within a few second window... call it 20 sec.
if (!(chkStat.st_size >= chkSize && chkStat.st_ino == chkIno
&& chkStat.st_ctime == chkCstime && VL_STAT_CTIME_NSEC(chkStat) == chkCnstime
&& chkStat.st_mtime <= (chkMstime + 20)
// Not comparing chkMnstime
)) {
UINFO(2, " --check-times failed: out-of-date "
<< chkFilename << "; " << chkStat.st_size << "=?" << chkSize << " "
<< chkStat.st_ctime << "." << VL_STAT_CTIME_NSEC(chkStat) << "=?"
<< chkCstime << "." << chkCnstime << " " << chkStat.st_mtime << "."
<< VL_STAT_MTIME_NSEC(chkStat) << "=?" << chkMstime << "."
<< chkMnstime << endl);
// Skip the .dat file itself, as we were writing it at the time was being made
if (filename == chkFilename) continue;
UINFO(9, " got d=" << chkDir << " s=" << chkSize << " ct=" << chkCstime << "."
<< chkCnstime << " mt=" << chkMstime << "." << chkMnstime
<< " h=" << chkHash << " fn= " << chkFilename);
UINFO(9, " nowSt s=" << curStat.st_size << " mt=" << curStat.st_mtime
<< " ct=" << curStat.st_ctime);
// We'd like this rule:
// if (!(curStat.st_size == chkSize
// && curStat.st_mtime == chkMstime) {
// However NFS messes us up, as there might be some data outstanding when
// we determined the original size. For safety, we know the creation time
// must be within a few second window... call it 20 sec.
if (!(curStat.st_size >= chkSize && curStat.st_ino == chkIno
&& curStat.st_ctime == chkCstime && VL_STAT_CTIME_NSEC(curStat) == chkCnstime
&& curStat.st_mtime <= (chkMstime + 20)
// Not comparing chkMnstime
)) {
UINFO(2, " --check-times: out-of-date "
<< chkFilename << "; " << curStat.st_size << "=?" << chkSize << " "
<< curStat.st_ctime << "." << VL_STAT_CTIME_NSEC(curStat) << "=?"
<< chkCstime << "." << chkCnstime << " " << curStat.st_mtime << "."
<< VL_STAT_MTIME_NSEC(curStat) << "=?" << chkMstime << "." << chkMnstime
<< endl);
if (skipHashing) return false;
// We didn't hash target output files, nor large files,
// as unlikely to find a match and can be large
if (chkHash == "unhashed") return false;
VHashSha256 curHash;
curHash.insertFile(chkFilename);
if (curHash.digestSymbol() != chkHash) {
UINFO(2, " --check-times: hash differs "
<< chkFilename << "; " << curHash.digestSymbol() << "=?" << chkHash);
return false;
}
}

View File

@ -17,6 +17,10 @@
#include "config_build.h"
#include "verilatedos.h"
#if defined(_WIN32) || defined(__MINGW32__)
# include <io.h> // open, read, write, close
#endif
#include "V3String.h"
#include "V3Error.h"
@ -28,6 +32,7 @@ VL_DEFINE_DEBUG_FUNCTIONS;
#endif
#include <algorithm>
#include <fcntl.h>
size_t VName::s_minLength = 32;
size_t VName::s_maxLength = 0; // Disabled
@ -494,6 +499,20 @@ void VHashSha256::insert(const void* datap, size_t length) {
m_remainder = std::string(reinterpret_cast<const char*>(chunkp + posBegin), chunkLen - posEnd);
}
void VHashSha256::insertFile(const string& filename) {
static const size_t BUFFER_SIZE = 64 * 1024;
const int fd = ::open(filename.c_str(), O_RDONLY);
if (fd < 0) return;
std::array<char, BUFFER_SIZE + 1> buf;
while (const size_t got = ::read(fd, &buf, BUFFER_SIZE)) {
if (got <= 0) break;
insert(&buf, got);
}
::close(fd);
}
void VHashSha256::finalize() {
if (!m_final) {
// Make sure no 64 byte blocks left

View File

@ -190,6 +190,7 @@ public:
insert(data.data(), data.length());
} // Process data into the digest
void insert(uint64_t value) { insert(cvtToStr(value)); }
void insertFile(const string& filename);
private:
static void selfTestOne(const string& data, const string& data2, const string& exp,

View File

@ -21,8 +21,17 @@ def prep_output_file(filename):
FileTimes[filename] = oldstats
def check_times():
for filename, oldtime in FileTimes.items():
newstats = os.path.getmtime(filename)
print("New %s mtime=%d" % (filename, newstats))
if oldtime != newstats:
test.error("--skip-identical was ignored -- regenerated %s" % (filename))
test.scenarios('vlt')
test.setenv('VERILATOR_DEBUG_SKIP_HASH', "1")
test.compile(verilator_flags2=['--stats'])
print("NOTE: use --debugi, as --debug in driver turns off skip-identical")
@ -32,13 +41,19 @@ prep_output_file(test.obj_dir + "/V" + test.name + "__stats.txt")
time.sleep(2) # Or else it might take < 1 second to compile and see no diff.
print("\nTest skip without hash fallback")
test.setenv('VERILATOR_DEBUG_SKIP_IDENTICAL', "1")
test.setenv('VERILATOR_DEBUG_SKIP_HASH', "1")
test.compile(verilator_flags2=['--stats'])
check_times()
for filename, oldtime in FileTimes.items():
newstats = os.path.getmtime(filename)
print("New %s mtime=%d" % (filename, newstats))
if oldtime != newstats:
test.error("--skip-identical was ignored -- regenerated %s" % (filename))
time.sleep(2) # Or else it might take < 1 second to compile and see no diff.
print("\nTest skip with hash fallback")
os.utime(test.top_filename, None)
test.setenv('VERILATOR_DEBUG_SKIP_IDENTICAL', "1")
test.setenv('VERILATOR_DEBUG_SKIP_HASH', "")
test.compile(verilator_flags2=['--stats'])
check_times()
test.passes()

View File

@ -4,6 +4,5 @@
// any use, without warranty, 2006 by Wilson Snyder.
// SPDX-License-Identifier: CC0-1.0
module t (/*AUTOARG*/);
module t;
endmodule