mirror of https://github.com/Jittor/Jittor
memory_profiler
This commit is contained in:
parent
1be344526c
commit
d80e4056f6
|
@ -721,3 +721,90 @@ def triu_(x,diagonal=0):
|
|||
return x.reindex(x.shape,indexs,overflow_conditions=overflow_conditions,overflow_value=0)
|
||||
|
||||
jt.Var.triu_ = triu_
|
||||
|
||||
def print_tree(now, max_memory_size, prefix1='', prefix2=''):
|
||||
def format_size(s):
|
||||
if (s < 1024):
|
||||
s = str(s)
|
||||
return s + ' B'
|
||||
|
||||
if (s < 1024*1024):
|
||||
s = format(s/1024, '.2f')
|
||||
return s + ' KB'
|
||||
|
||||
if (s < 1024*1024*1024):
|
||||
s = format(s/1024/1024, '.2f')
|
||||
return s + ' MB'
|
||||
|
||||
s = format(s/1024/1024/1024, '.2f')
|
||||
return s + ' GB'
|
||||
|
||||
tab = ' '
|
||||
print(prefix1+now['name']+'('+now['type']+')')
|
||||
print(prefix2+'['+format_size(now['size'])+'; '+format(now['size']/max_memory_size*100, '.2f')+'%]')
|
||||
for p in now['path']:
|
||||
print(prefix2+p)
|
||||
if (len(now['children']) > 0):
|
||||
print(prefix2 + tab + '| ')
|
||||
else:
|
||||
print(prefix2)
|
||||
for i in range(len(now['children'])):
|
||||
c = now['children'][i]
|
||||
if i < len(now['children']) - 1:
|
||||
prefix1_ = prefix2 + tab + '├─'
|
||||
prefix2_ = prefix2 + tab + '| '
|
||||
else:
|
||||
prefix1_ = prefix2 + tab + '└─'
|
||||
prefix2_ = prefix2 + tab + ' '
|
||||
print_tree(c, max_memory_size, prefix1_, prefix2_)
|
||||
|
||||
def get_max_memory_treemap():
|
||||
div1 = "[!@#div1!@#]"
|
||||
div2 = "[!@#div2!@#]"
|
||||
div3 = "[!@#div3!@#]"
|
||||
info = jt.get_max_memory_info()
|
||||
|
||||
vars = []
|
||||
vars_ = info.split(div1)
|
||||
max_memory_size = int(vars_[0])
|
||||
vars_ = vars_[1:]
|
||||
for v_ in vars_:
|
||||
v__ = v_.split(div2)
|
||||
var = {'size':int(v__[1]), 'stack':[]}
|
||||
v__ = v__[2:-1]
|
||||
for s_ in v__:
|
||||
s__ = s_.split(div3)
|
||||
s = {'path':s__[0], 'name':s__[1], 'type':s__[2]}
|
||||
var['stack'].append(s)
|
||||
vars.append(var)
|
||||
tree = {'name':'root', "children":[], 'size':0, 'path':[], 'type':''}
|
||||
|
||||
def find_child(now, key):
|
||||
for c in now['children']:
|
||||
if (c['name'] == key):
|
||||
return c
|
||||
return None
|
||||
for v in vars:
|
||||
now = tree
|
||||
now['size'] += v['size']
|
||||
for s in v['stack']:
|
||||
ch = find_child(now, s['name'])
|
||||
if (ch is not None):
|
||||
if (not s['path'] in ch['path']):
|
||||
ch['path'].append(s['path'])
|
||||
assert(ch['type']==s['type'])
|
||||
now = ch
|
||||
now['size'] += v['size']
|
||||
else:
|
||||
now_ = {'name':s['name'], "children":[], 'size':v['size'], 'path':[s['path']], 'type':s['type']}
|
||||
now['children'].append(now_)
|
||||
now = now_
|
||||
def sort_tree(now):
|
||||
def takeSize(elem):
|
||||
return elem['size']
|
||||
now['children'].sort(key=takeSize, reverse=True)
|
||||
for c in now['children']:
|
||||
sort_tree(c)
|
||||
sort_tree(tree)
|
||||
print_tree(tree, max_memory_size, '', '')
|
||||
return tree
|
|
@ -63,7 +63,7 @@ class TestMemoryProfiler(unittest.TestCase):
|
|||
prev = time.time()
|
||||
SGD = nn.SGD(mnist_net.parameters(), self.learning_rate, self.momentum, self.weight_decay)
|
||||
|
||||
iters = 50
|
||||
iters = 10
|
||||
for batch_idx, (data, target) in enumerate(self.train_loader):
|
||||
if (batch_idx > iters):
|
||||
break
|
||||
|
@ -82,6 +82,7 @@ class TestMemoryProfiler(unittest.TestCase):
|
|||
jt.fetch(batch_idx, loss, output, target, callback)
|
||||
jt.sync_all(True)
|
||||
jt.display_max_memory_info()
|
||||
jt.get_max_memory_treemap()
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <algorithm>
|
||||
#include <sys/sysinfo.h>
|
||||
#include <sstream>
|
||||
#include "pybind/py_var_tracer.h"
|
||||
|
||||
namespace jittor {
|
||||
|
||||
|
@ -63,7 +64,7 @@ void MemoryProfiler::check() {
|
|||
|
||||
allocations.clear();
|
||||
size_t memory_size = 0;
|
||||
vector<std::pair<string, size_t>> live_vars;
|
||||
std::vector<std::pair<std::pair<string, vector<Stack>>, size_t>> live_vars;
|
||||
vector<Node*> queue;
|
||||
|
||||
auto t = ++Node::tflag_count;
|
||||
|
@ -77,9 +78,10 @@ void MemoryProfiler::check() {
|
|||
if (node->is_var()) {
|
||||
Var* var = (Var*)node;
|
||||
if (var->mem_ptr != nullptr) {
|
||||
vector<Stack> stacks = get_node_trace(var);
|
||||
std::stringstream stream;
|
||||
stream << var;
|
||||
live_vars.push_back(std::make_pair(stream.str(), var->size));
|
||||
live_vars.push_back(std::make_pair(std::make_pair(stream.str(), stacks), var->size));
|
||||
if (!allocations.count(var->mem_ptr)) {
|
||||
allocations[var->mem_ptr] = 1;
|
||||
memory_size += var->size;
|
||||
|
@ -92,7 +94,7 @@ void MemoryProfiler::check() {
|
|||
}
|
||||
}
|
||||
|
||||
bool MemoryProfiler::cmp(const std::pair<string, size_t>& a, const std::pair<string, size_t>& b) {
|
||||
bool MemoryProfiler::cmp(const std::pair<std::pair<string, vector<Stack>>, size_t>& a, const std::pair<std::pair<string, vector<Stack>>, size_t>& b) {
|
||||
return a.second > b.second;
|
||||
}
|
||||
|
||||
|
@ -104,7 +106,11 @@ void MemoryProfiler::display_max_memory_info() {
|
|||
log << "max var memory" << FloatOutput_{(double)max_memory_size, " KMG", 1024, "B"} << "\n\n";
|
||||
log << "[Size]" << "[Percent]" << "[Var Info]" << "\n";
|
||||
for (int i = 0; i < max_live_vars.size(); ++i) {
|
||||
log << FloatOutput_{(double)max_live_vars[i].second, " KMG", 1024, "B"} << double(max_live_vars[i].second) / max_memory_size * 100 << "%" << max_live_vars[i].first << "\n\n";
|
||||
log << FloatOutput_{(double)max_live_vars[i].second, " KMG", 1024, "B"}
|
||||
<< double(max_live_vars[i].second) / max_memory_size * 100 << "%"
|
||||
<< max_live_vars[i].first.first
|
||||
<< max_live_vars[i].first.second[0].file_path + ":" + std::to_string(max_live_vars[i].first.second[0].lineno)
|
||||
<< "\n\n";
|
||||
}
|
||||
log << "=========================\n";
|
||||
log.end();
|
||||
|
@ -114,4 +120,29 @@ void display_max_memory_info() {
|
|||
memory_profiler.display_max_memory_info();
|
||||
}
|
||||
|
||||
string MemoryProfiler::get_max_memory_info() {
|
||||
std::stringstream out;
|
||||
string div1 = "[!@#div1!@#]";
|
||||
string div2 = "[!@#div2!@#]";
|
||||
string div3 = "[!@#div3!@#]";
|
||||
|
||||
std::sort(max_live_vars.begin(), max_live_vars.end(), cmp);
|
||||
out << max_memory_size;
|
||||
for (int i = 0; i < max_live_vars.size(); ++i) {
|
||||
out << div1;
|
||||
out << max_live_vars[i].first.first << div2;
|
||||
out << max_live_vars[i].second << div2;
|
||||
for (int j = 0; j < max_live_vars[i].first.second.size(); ++j) {
|
||||
out << max_live_vars[i].first.second[j].file_path + ":" + std::to_string(max_live_vars[i].first.second[j].lineno) << div3
|
||||
<< max_live_vars[i].first.second[j].module_name << div3
|
||||
<< max_live_vars[i].first.second[j].module_type << div2;
|
||||
}
|
||||
}
|
||||
return out.str();
|
||||
}
|
||||
|
||||
string get_max_memory_info() {
|
||||
return memory_profiler.get_max_memory_info();
|
||||
}
|
||||
|
||||
} // jittor
|
|
@ -11,25 +11,29 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include "var.h"
|
||||
#include "pybind/py_var_tracer.h"
|
||||
namespace jittor {
|
||||
|
||||
// @pyjt(display_max_memory_info)
|
||||
void display_max_memory_info();
|
||||
// @pyjt(get_max_memory_info)
|
||||
string get_max_memory_info();
|
||||
|
||||
struct MemoryProfiler {
|
||||
std::map<void*, size_t> allocations;
|
||||
// Max Infos
|
||||
std::vector<std::pair<string, size_t>> max_live_vars;
|
||||
vector<std::pair<std::pair<string, vector<Stack>>, size_t>> max_live_vars;
|
||||
size_t max_used_memory_size;
|
||||
size_t max_memory_size;
|
||||
|
||||
|
||||
MemoryProfiler();
|
||||
static bool cmp(const std::pair<string, size_t>& a, const std::pair<string, size_t>& b);
|
||||
static bool cmp(const std::pair<std::pair<string, vector<Stack>>, size_t>& a, const std::pair<std::pair<string, vector<Stack>>, size_t>& b);
|
||||
void clear();
|
||||
void check();
|
||||
std::pair<size_t, size_t> get_memory_info();
|
||||
void display_max_memory_info();
|
||||
string get_max_memory_info();
|
||||
};
|
||||
|
||||
extern MemoryProfiler memory_profiler;
|
||||
|
|
|
@ -97,7 +97,8 @@ static vector<Stack> get_stack_info() {
|
|||
PyObject* prev_obj = nullptr;
|
||||
if (trace_py_var >= 3) {
|
||||
// trace raw stack
|
||||
auto start = std::max(0, n-5);
|
||||
// auto start = std::max(0, n-5);
|
||||
auto start = 0;
|
||||
for (int i=start; i<n; i++) {
|
||||
auto f = frames[i];
|
||||
auto filename = to_string(f->f_code->co_filename);
|
||||
|
@ -185,7 +186,7 @@ void TraceData::record_node(Node* node, bool record_stack) {
|
|||
NodeData data;
|
||||
data.id = node_data_cnt++;
|
||||
id_map[node] = data.id;
|
||||
if (!node->is_var() || trace_py_var>=3) {
|
||||
if (trace_py_var) {
|
||||
if (record_stack) {
|
||||
if (trace_grad_op) {
|
||||
auto iter = trace_data.id_map.find(trace_grad_op);
|
||||
|
@ -363,4 +364,16 @@ void print_node_trace(const Node* node, std::ostream& os) {
|
|||
os << _get_stack_info((Node*)node);
|
||||
}
|
||||
|
||||
vector<Stack> get_node_trace(Node* node) {
|
||||
auto iter = trace_data.id_map.find(node);
|
||||
if (iter == trace_data.id_map.end())
|
||||
return vector<Stack>();
|
||||
auto node_id = iter->second;
|
||||
auto iter2 = trace_data.node_data.find(node_id);
|
||||
if (iter2 == trace_data.node_data.end())
|
||||
return vector<Stack>();
|
||||
return iter2->second.stacks;
|
||||
}
|
||||
|
||||
|
||||
} // jittor
|
||||
|
|
|
@ -67,5 +67,5 @@ struct TraceData {
|
|||
extern TraceData trace_data;
|
||||
|
||||
void print_node_trace(const Node* node, std::ostream& os);
|
||||
|
||||
vector<Stack> get_node_trace(Node* node);
|
||||
} // jittor
|
||||
|
|
Loading…
Reference in New Issue