mirror of https://github.com/Jittor/Jittor
add meminfo and grad warning
This commit is contained in:
parent
c657491a51
commit
1c19d5837d
|
@ -122,7 +122,7 @@ vector<VarPtr> grad(Var* loss, vector<Var*> targets) {
|
|||
if (var->tflag == nt)
|
||||
grad = move(grads[var->custom_data]);
|
||||
if (!grad) {
|
||||
LOGvvv << var << "grads[">>i>>"] set to zero";
|
||||
LOGw << "grads[">>i>>"] doesn't have gradient. It will be set to zero:" << var;
|
||||
grad = make_number(0.f, var);
|
||||
assign_attrs(grad.ptr, var);
|
||||
registe_node_trace_grad(grad.ptr, var, 0);
|
||||
|
|
|
@ -4,12 +4,6 @@
|
|||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include <typeinfo>
|
||||
#include <iomanip>
|
||||
#include <sys/sysinfo.h>
|
||||
|
||||
#include "var.h"
|
||||
#include "op.h"
|
||||
#include "var_holder.h"
|
||||
#include "misc/cuda_flags.h"
|
||||
|
||||
#include "mem/allocator/aligned_allocator.h"
|
||||
|
@ -92,67 +86,4 @@ void gc_all() {
|
|||
for (auto& kv : allocators) kv.second->gc();
|
||||
}
|
||||
|
||||
struct FloatOutput {
|
||||
double value;
|
||||
string scale;
|
||||
int base;
|
||||
string suffix;
|
||||
int p=4;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const FloatOutput& o) {
|
||||
int w = 8;
|
||||
os << std::setw(w-2-o.suffix.size());
|
||||
os << std::setprecision(o.p);
|
||||
uint i=0;
|
||||
double k = o.value;
|
||||
for (; i+1<o.scale.size(); i++) {
|
||||
if (k<o.base) break;
|
||||
k /= o.base;
|
||||
}
|
||||
os << k << o.scale[i];
|
||||
return os << o.suffix;
|
||||
}
|
||||
|
||||
void display_memory_info(const char* fileline) {
|
||||
int p = 2;
|
||||
Log log(fileline, 'i', 0);
|
||||
log << "\n=== display_memory_info ===\n";
|
||||
log << "hold_vars:" << VarHolder::hold_vars.size()
|
||||
<< "lived_vars:" << Var::number_of_lived_vars
|
||||
<< "lived_ops:" << Op::number_of_lived_ops >> '\n';
|
||||
if (use_stat_allocator) {
|
||||
log << "stat:" << use_stat_allocator;
|
||||
log << "total alloc:" << FloatOutput{(double)(stat_allocator_total_alloc_byte
|
||||
- stat_allocator_total_free_byte), " KMG", 1024, "B"};
|
||||
log << "total alloc call:" << FloatOutput{(double)(stat_allocator_total_alloc_call
|
||||
- stat_allocator_total_free_call), " KMG", 1000, ""} >> '\n';
|
||||
}
|
||||
for (auto& a : SFRLAllocator::sfrl_allocators) {
|
||||
auto total = a->used_memory + a->unused_memory;
|
||||
log << "name:" << a->name() << "is_cuda:" << a->is_cuda()
|
||||
<< "used:" << FloatOutput{(double)a->used_memory, " KMG", 1024, "B"}
|
||||
>> "(" >> std::setprecision(p) >> a->used_memory*100.0 / total >> "%)"
|
||||
<< "unused:" << FloatOutput{(double)a->unused_memory, " KMG", 1024, "B"}
|
||||
>> "(" >> std::setprecision(p) >> a->unused_memory*100.0 / total >> "%)"
|
||||
<< "total:" << FloatOutput{(double)total, " KMG", 1024, "B"} >> "\n";
|
||||
}
|
||||
log >> "===========================\n";
|
||||
log.end();
|
||||
}
|
||||
|
||||
MemInfo::MemInfo() {
|
||||
struct sysinfo info = {0};
|
||||
sysinfo(&info);
|
||||
total_cpu_ram = info.totalram;
|
||||
total_cuda_ram = 0;
|
||||
#ifdef HAS_CUDA
|
||||
cudaDeviceProp prop = {0};
|
||||
cudaGetDeviceProperties(&prop, 0);
|
||||
total_cuda_ram = prop.totalGlobalMem;
|
||||
#endif
|
||||
}
|
||||
|
||||
MemInfo mem_info;
|
||||
|
||||
} // jittor
|
|
@ -5,6 +5,7 @@
|
|||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "common.h"
|
||||
#include "mem/mem_info.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
|
@ -51,24 +52,4 @@ Allocator* get_allocator();
|
|||
// @pyjt(gc)
|
||||
void gc_all();
|
||||
|
||||
// @pyjt(display_memory_info)
|
||||
void display_memory_info(const char* fileline="");
|
||||
|
||||
// @pyjt(MemInfo)
|
||||
struct MemInfo {
|
||||
// @pyjt(total_cpu_ram)
|
||||
int64 total_cpu_ram;
|
||||
// @pyjt(total_cuda_ram)
|
||||
int64 total_cuda_ram;
|
||||
|
||||
inline MemInfo(const MemInfo&) = default;
|
||||
|
||||
MemInfo();
|
||||
};
|
||||
|
||||
extern MemInfo mem_info;
|
||||
|
||||
// @pyjt(get_mem_info)
|
||||
inline MemInfo get_mem_info() { return mem_info; }
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,110 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor. Authors: Dun Liang <randonlang@gmail.com>. All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include <iomanip>
|
||||
#include <algorithm>
|
||||
#include <sys/sysinfo.h>
|
||||
|
||||
#include "var.h"
|
||||
#include "op.h"
|
||||
#include "var_holder.h"
|
||||
#include "graph.h"
|
||||
#include "misc/cuda_flags.h"
|
||||
#include "mem/allocator/sfrl_allocator.h"
|
||||
#include "mem/allocator/stat_allocator.h"
|
||||
#include "mem/mem_info.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
struct FloatOutput {
|
||||
double value;
|
||||
string scale;
|
||||
int base;
|
||||
string suffix;
|
||||
int p=4;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const FloatOutput& o) {
|
||||
int w = 8;
|
||||
os << std::setw(w-2-o.suffix.size());
|
||||
os << std::setprecision(o.p);
|
||||
uint i=0;
|
||||
double k = o.value;
|
||||
for (; i+1<o.scale.size(); i++) {
|
||||
if (k<o.base) break;
|
||||
k /= o.base;
|
||||
}
|
||||
os << k << o.scale[i];
|
||||
return os << o.suffix;
|
||||
}
|
||||
|
||||
void display_memory_info(const char* fileline) {
|
||||
int p = 3;
|
||||
Log log(fileline, 'i', 0);
|
||||
log << "\n=== display_memory_info ===\n";
|
||||
log << "total_cpu_ram:" <<
|
||||
FloatOutput{(double)mem_info.total_cpu_ram, " KMG", 1024, "B"};
|
||||
log << "total_cuda_ram:" <<
|
||||
FloatOutput{(double)mem_info.total_cuda_ram, " KMG", 1024, "B"} >> "\n";
|
||||
log << "hold_vars:" << VarHolder::hold_vars.size()
|
||||
<< "lived_vars:" << Var::number_of_lived_vars
|
||||
<< "lived_ops:" << Op::number_of_lived_ops >> '\n';
|
||||
|
||||
#ifdef NODE_MEMCHECK
|
||||
// get the oldest var
|
||||
vector<Node*> queue;
|
||||
auto t = ++Node::tflag_count;
|
||||
for (auto& vh : VarHolder::hold_vars)
|
||||
if (vh->var->tflag != t) {
|
||||
vh->var->tflag = t;
|
||||
queue.push_back(vh->var);
|
||||
}
|
||||
bfs_both(queue, [](Node*){return true;});
|
||||
vector<pair<int64, Node*>> nodes;
|
||||
nodes.reserve(queue.size());
|
||||
for (auto* node : queue)
|
||||
nodes.push_back({node->__id(), node});
|
||||
std::sort(nodes.begin(), nodes.end());
|
||||
log << "list of the oldest nodes:\n";
|
||||
for (int i=0; i<10 && i<nodes.size(); i++) {
|
||||
log << "ID#" >> nodes[i].first >> ":" << nodes[i].second << "\n";
|
||||
}
|
||||
#endif
|
||||
|
||||
if (use_stat_allocator) {
|
||||
log << "stat:" << use_stat_allocator;
|
||||
log << "total alloc:" << FloatOutput{(double)(stat_allocator_total_alloc_byte
|
||||
- stat_allocator_total_free_byte), " KMG", 1024, "B"};
|
||||
log << "total alloc call:" << FloatOutput{(double)(stat_allocator_total_alloc_call
|
||||
- stat_allocator_total_free_call), " KMG", 1000, ""} >> '\n';
|
||||
}
|
||||
for (auto& a : SFRLAllocator::sfrl_allocators) {
|
||||
auto total = a->used_memory + a->unused_memory;
|
||||
log << "name:" << a->name() << "is_cuda:" << a->is_cuda()
|
||||
<< "used:" << FloatOutput{(double)a->used_memory, " KMG", 1024, "B"}
|
||||
>> "(" >> std::setprecision(p) >> a->used_memory*100.0 / total >> "%)"
|
||||
<< "unused:" << FloatOutput{(double)a->unused_memory, " KMG", 1024, "B"}
|
||||
>> "(" >> std::setprecision(p) >> a->unused_memory*100.0 / total >> "%)"
|
||||
<< "total:" << FloatOutput{(double)total, " KMG", 1024, "B"} >> "\n";
|
||||
}
|
||||
log >> "===========================\n";
|
||||
log.end();
|
||||
}
|
||||
|
||||
MemInfo::MemInfo() {
|
||||
struct sysinfo info = {0};
|
||||
sysinfo(&info);
|
||||
total_cpu_ram = info.totalram;
|
||||
total_cuda_ram = 0;
|
||||
#ifdef HAS_CUDA
|
||||
cudaDeviceProp prop = {0};
|
||||
cudaGetDeviceProperties(&prop, 0);
|
||||
total_cuda_ram = prop.totalGlobalMem;
|
||||
#endif
|
||||
}
|
||||
|
||||
MemInfo mem_info;
|
||||
|
||||
} // jittor
|
|
@ -0,0 +1,31 @@
|
|||
// ***************************************************************
|
||||
// Copyright (c) 2020 Jittor. Authors: Dun Liang <randonlang@gmail.com>. All Rights Reserved.
|
||||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#pragma once
|
||||
#include "common.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
// @pyjt(display_memory_info)
|
||||
void display_memory_info(const char* fileline="");
|
||||
|
||||
// @pyjt(MemInfo)
|
||||
struct MemInfo {
|
||||
// @pyjt(total_cpu_ram)
|
||||
int64 total_cpu_ram;
|
||||
// @pyjt(total_cuda_ram)
|
||||
int64 total_cuda_ram;
|
||||
|
||||
inline MemInfo(const MemInfo&) = default;
|
||||
|
||||
MemInfo();
|
||||
};
|
||||
|
||||
extern MemInfo mem_info;
|
||||
|
||||
// @pyjt(get_mem_info)
|
||||
inline MemInfo get_mem_info() { return mem_info; }
|
||||
|
||||
} // jittor
|
|
@ -15,23 +15,33 @@ using namespace pybind11::literals;
|
|||
|
||||
namespace jittor {
|
||||
|
||||
DEFINE_FLAG(int, trace_py_var, 0, "Trace py stack for debug.");
|
||||
DEFINE_FLAG(int, trace_py_var, 0, "Trace py stack max depth for debug.");
|
||||
|
||||
unordered_map<const Node*, string> trace_data;
|
||||
|
||||
void __registe_node_trace(Node* node) {
|
||||
auto py_stack =
|
||||
auto py_stacks =
|
||||
py::module::import("traceback")
|
||||
.attr("extract_stack")(nullptr, 1).attr("__getitem__")(0);
|
||||
auto filename = py_stack.attr("filename").cast<string>();
|
||||
auto basename = split(filename, "/").back();
|
||||
basename += ':';
|
||||
basename += py_stack.attr("name").cast<string>();
|
||||
basename += ':';
|
||||
basename += S(py_stack.attr("lineno").cast<int>());
|
||||
basename += ':';
|
||||
basename += py_stack.attr("line").cast<string>();
|
||||
trace_data[node] = basename;
|
||||
.attr("extract_stack")(nullptr, trace_py_var);
|
||||
auto len = py_stacks.attr("__len__")().cast<int>();
|
||||
string info;
|
||||
for (int i=0; i<len; i++) {
|
||||
auto py_stack = py_stacks.attr("__getitem__")(i);
|
||||
auto filename = py_stack.attr("filename").cast<string>();
|
||||
if (len==1)
|
||||
info += split(filename, "/").back();
|
||||
else {
|
||||
info += "\n ";
|
||||
info += filename;
|
||||
}
|
||||
info += ':';
|
||||
info += py_stack.attr("name").cast<string>();
|
||||
info += ':';
|
||||
info += S(py_stack.attr("lineno").cast<int>());
|
||||
info += ':';
|
||||
info += py_stack.attr("line").cast<string>();
|
||||
}
|
||||
trace_data[node] = info;
|
||||
}
|
||||
|
||||
void __unregiste_node_trace(Node* node) {
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
// This file is subject to the terms and conditions defined in
|
||||
// file 'LICENSE.txt', which is part of this source code package.
|
||||
// ***************************************************************
|
||||
#include <sstream>
|
||||
#ifdef HAS_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <helper_cuda.h>
|
||||
|
@ -122,4 +123,10 @@ vector<ArrayArgs> fetch_sync(const vector<VarHolder*>& vh) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
string VarHolder::debug_msg() {
|
||||
std::stringstream ss;
|
||||
ss << var;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
} // jittor
|
|
@ -154,6 +154,9 @@ struct VarHolder {
|
|||
#endif
|
||||
std::memcpy(var->mem_ptr, array.ptr, size);
|
||||
}
|
||||
|
||||
// @pyjt(debug_msg)
|
||||
string debug_msg();
|
||||
};
|
||||
|
||||
// @pyjt(sync)
|
||||
|
|
Loading…
Reference in New Issue