diff --git a/include/verilated.cpp b/include/verilated.cpp index 74ceae09d..33cf917d4 100644 --- a/include/verilated.cpp +++ b/include/verilated.cpp @@ -3061,7 +3061,9 @@ void VerilatedContext::statsPrintSummary() VL_MT_UNSAFE { = vl_timescaled_double((cputime != 0.0) ? (simtimeInUnits / cputime) : 0, "%0.3f %s"); VL_PRINTF("- Verilator: %s at %s; walltime %0.3f s; speed %s/s\n", endwhy.c_str(), simtime.c_str(), walltime, simtimePerf.c_str()); - const double modelMB = VlOs::memPeakUsageBytes() / 1024.0 / 1024.0; + uint64_t memPeak, memCurrent; + VlOs::memUsageBytes(memPeak /*ref*/, memCurrent /*ref*/); + const double modelMB = memPeak / 1024.0 / 1024.0; VL_PRINTF("- Verilator: cpu %0.3f s on %u threads; alloced %0.0f MB\n", cputime, threadsInModels(), modelMB); } diff --git a/include/verilatedos.h b/include/verilatedos.h index 085bb2544..6b289292f 100644 --- a/include/verilatedos.h +++ b/include/verilatedos.h @@ -637,7 +637,7 @@ extern std::string getenvStr(const std::string& envvar, extern uint16_t getcpu() VL_MT_SAFE; /// Return memory usage in bytes, or 0 if unknown -extern uint64_t memPeakUsageBytes() VL_MT_SAFE; +extern void memUsageBytes(uint64_t& peakr, uint64_t& currentr) VL_MT_SAFE; // Internal: Record CPU time, starting point on construction, and current delta from that class DeltaCpuTime final { diff --git a/include/verilatedos_c.h b/include/verilatedos_c.h index 6ecdf7dba..2b4619816 100644 --- a/include/verilatedos_c.h +++ b/include/verilatedos_c.h @@ -25,6 +25,9 @@ #include "verilatedos.h" +#include +#include + // clang-format off #if defined(_WIN32) || defined(__MINGW32__) # include // LONG for bcrypt.h on MINGW @@ -104,28 +107,40 @@ uint16_t getcpu() VL_MT_SAFE { //========================================================================= // VlOs::memPeakUsageBytes implementation -uint64_t memPeakUsageBytes() VL_MT_SAFE { +void memUsageBytes(uint64_t& peakr, uint64_t& currentr) VL_MT_SAFE { + peakr = 0; + currentr = 0; #if defined(_WIN32) || defined(__MINGW32__) const HANDLE process = GetCurrentProcess(); PROCESS_MEMORY_COUNTERS pmc; if (GetProcessMemoryInfo(process, &pmc, sizeof(pmc))) { // The best we can do using simple Windows APIs is to get the size of the working set. - return pmc.WorkingSetSize; + peakr = pmc.PeakWorkingSetSize; + currentr = pmc.WorkingSetSize; } - return 0; #else // Highly unportable. Sorry - const char* const statmFilename = "/proc/self/statm"; - FILE* const fp = fopen(statmFilename, "r"); - if (!fp) return 0; - uint64_t size, resident, share, text, lib, data, dt; // All in pages - const int items = fscanf( - fp, "%" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64 " %" SCNu64, - &size, &resident, &share, &text, &lib, &data, &dt); - fclose(fp); - if (VL_UNCOVERABLE(7 != items)) return 0; - // Return the vm size, not the current active set size (/proc/self/status VmRSS + VmSwap) - return (text + data) * getpagesize(); + std::ifstream is{"/proc/self/status"}; + if (!is) return; + std::string line; + uint64_t vmPeak = 0; + uint64_t vmRss = 0; + uint64_t vmSwap = 0; + std::string field; + while (std::getline(is, line)) { + if (line.rfind("VmPeak:", 0) == 0) { + std::stringstream ss{line}; + ss >> field >> vmPeak; + } else if (line.rfind("VmRSS:", 0) == 0) { + std::stringstream ss{line}; + ss >> field >> vmRss; + } else if (line.rfind("VmSwap:", 0) == 0) { + std::stringstream ss{line}; + ss >> field >> vmSwap; + } + } + peakr = vmPeak * 1024; + currentr = (vmRss + vmSwap) * 1024; #endif } diff --git a/src/V3Stats.cpp b/src/V3Stats.cpp index 394ba1de2..cee37d85a 100644 --- a/src/V3Stats.cpp +++ b/src/V3Stats.cpp @@ -179,7 +179,4 @@ void V3Stats::statsStageAll(AstNetlist* nodep, const std::string& stage, bool fa StatsVisitor{nodep, stage, fastOnly}; } -void V3Stats::statsFinalAll(AstNetlist* nodep) { - statsStageAll(nodep, "Final all"); - statsStageAll(nodep, "Final fast", true); -} +void V3Stats::statsFinalAll(AstNetlist* nodep) { statsStageAll(nodep, "Final"); } diff --git a/src/V3StatsReport.cpp b/src/V3StatsReport.cpp index 2c5dadb80..3efe8e947 100644 --- a/src/V3StatsReport.cpp +++ b/src/V3StatsReport.cpp @@ -215,8 +215,10 @@ void V3Stats::statsStage(const string& name) { V3Stats::addStatPerf("Stage, Elapsed time (sec), " + digitName, wallTimeDelta); V3Stats::addStatPerf("Stage, Elapsed time (sec), TOTAL", wallTimeDelta); - const double memory = VlOs::memPeakUsageBytes() / 1024.0 / 1024.0; - V3Stats::addStatPerf("Stage, Memory (MB), " + digitName, memory); + uint64_t memPeak, memCurrent; + VlOs::memUsageBytes(memPeak /*ref*/, memCurrent /*ref*/); + V3Stats::addStatPerf("Stage, Memory current (MB), " + digitName, memCurrent / 1024.0 / 1024.0); + V3Stats::addStatPerf("Stage, Memory peak (MB), " + digitName, memPeak / 1024.0 / 1024.0); } void V3Stats::infoHeader(std::ofstream& os, const string& prefix) { @@ -266,7 +268,9 @@ void V3Stats::summaryReport() { << ", cvt=" << walltimeCvt << ", bld=" << walltimeBuild << "); cpu " << cputime << " s on " << std::max(v3Global.opt.verilateJobs(), v3Global.opt.buildJobs()) << " threads"; - const double memory = VlOs::memPeakUsageBytes() / 1024.0 / 1024.0; + uint64_t memPeak, memCurrent; + VlOs::memUsageBytes(memPeak /*ref*/, memCurrent /*ref*/); + const double memory = memPeak / 1024.0 / 1024.0; if (VL_UNCOVERABLE(memory != 0.0)) std::cout << "; alloced " << memory << " MB"; std::cout << "\n"; } diff --git a/src/Verilator.cpp b/src/Verilator.cpp index f712412b8..61cc3b9b5 100644 --- a/src/Verilator.cpp +++ b/src/Verilator.cpp @@ -768,7 +768,8 @@ static void verilate(const string& argString) { V3Os::filesystemFlushBuildDir(v3Global.opt.makeDir()); if (v3Global.opt.hierTop()) V3Os::filesystemFlushBuildDir(v3Global.opt.hierTopDataDir()); - if (v3Global.opt.stats()) V3Stats::statsStage("wrote"); + if (v3Global.opt.stats()) V3Stats::statsStageAll(v3Global.rootp(), "WroteAll"); + if (v3Global.opt.stats()) V3Stats::statsStageAll(v3Global.rootp(), "WroteFast"); // Final writing shouldn't throw warnings, but... V3Error::abortIfWarnings(); diff --git a/test_regress/t/t_display_merge.py b/test_regress/t/t_display_merge.py index 03f439c26..21d0b3b95 100755 --- a/test_regress/t/t_display_merge.py +++ b/test_regress/t/t_display_merge.py @@ -9,7 +9,7 @@ import vltest_bootstrap -test.scenarios('simulator') +test.scenarios('simulator_st') test.compile(verilator_flags2=["--stats"])