memory_profiler

This commit is contained in:
cxjyxx_me 2021-01-05 17:35:01 +08:00
parent 1be344526c
commit d80e4056f6
6 changed files with 146 additions and 10 deletions

View File

@ -721,3 +721,90 @@ def triu_(x,diagonal=0):
return x.reindex(x.shape,indexs,overflow_conditions=overflow_conditions,overflow_value=0)
jt.Var.triu_ = triu_
def print_tree(now, max_memory_size, prefix1='', prefix2=''):
def format_size(s):
if (s < 1024):
s = str(s)
return s + ' B'
if (s < 1024*1024):
s = format(s/1024, '.2f')
return s + ' KB'
if (s < 1024*1024*1024):
s = format(s/1024/1024, '.2f')
return s + ' MB'
s = format(s/1024/1024/1024, '.2f')
return s + ' GB'
tab = ' '
print(prefix1+now['name']+'('+now['type']+')')
print(prefix2+'['+format_size(now['size'])+'; '+format(now['size']/max_memory_size*100, '.2f')+'%]')
for p in now['path']:
print(prefix2+p)
if (len(now['children']) > 0):
print(prefix2 + tab + '| ')
else:
print(prefix2)
for i in range(len(now['children'])):
c = now['children'][i]
if i < len(now['children']) - 1:
prefix1_ = prefix2 + tab + '├─'
prefix2_ = prefix2 + tab + '| '
else:
prefix1_ = prefix2 + tab + '└─'
prefix2_ = prefix2 + tab + ' '
print_tree(c, max_memory_size, prefix1_, prefix2_)
def get_max_memory_treemap():
div1 = "[!@#div1!@#]"
div2 = "[!@#div2!@#]"
div3 = "[!@#div3!@#]"
info = jt.get_max_memory_info()
vars = []
vars_ = info.split(div1)
max_memory_size = int(vars_[0])
vars_ = vars_[1:]
for v_ in vars_:
v__ = v_.split(div2)
var = {'size':int(v__[1]), 'stack':[]}
v__ = v__[2:-1]
for s_ in v__:
s__ = s_.split(div3)
s = {'path':s__[0], 'name':s__[1], 'type':s__[2]}
var['stack'].append(s)
vars.append(var)
tree = {'name':'root', "children":[], 'size':0, 'path':[], 'type':''}
def find_child(now, key):
for c in now['children']:
if (c['name'] == key):
return c
return None
for v in vars:
now = tree
now['size'] += v['size']
for s in v['stack']:
ch = find_child(now, s['name'])
if (ch is not None):
if (not s['path'] in ch['path']):
ch['path'].append(s['path'])
assert(ch['type']==s['type'])
now = ch
now['size'] += v['size']
else:
now_ = {'name':s['name'], "children":[], 'size':v['size'], 'path':[s['path']], 'type':s['type']}
now['children'].append(now_)
now = now_
def sort_tree(now):
def takeSize(elem):
return elem['size']
now['children'].sort(key=takeSize, reverse=True)
for c in now['children']:
sort_tree(c)
sort_tree(tree)
print_tree(tree, max_memory_size, '', '')
return tree

View File

@ -63,7 +63,7 @@ class TestMemoryProfiler(unittest.TestCase):
prev = time.time()
SGD = nn.SGD(mnist_net.parameters(), self.learning_rate, self.momentum, self.weight_decay)
iters = 50
iters = 10
for batch_idx, (data, target) in enumerate(self.train_loader):
if (batch_idx > iters):
break
@ -82,6 +82,7 @@ class TestMemoryProfiler(unittest.TestCase):
jt.fetch(batch_idx, loss, output, target, callback)
jt.sync_all(True)
jt.display_max_memory_info()
jt.get_max_memory_treemap()
if __name__ == "__main__":
unittest.main()

View File

@ -7,6 +7,7 @@
#include <algorithm>
#include <sys/sysinfo.h>
#include <sstream>
#include "pybind/py_var_tracer.h"
namespace jittor {
@ -63,7 +64,7 @@ void MemoryProfiler::check() {
allocations.clear();
size_t memory_size = 0;
vector<std::pair<string, size_t>> live_vars;
std::vector<std::pair<std::pair<string, vector<Stack>>, size_t>> live_vars;
vector<Node*> queue;
auto t = ++Node::tflag_count;
@ -77,9 +78,10 @@ void MemoryProfiler::check() {
if (node->is_var()) {
Var* var = (Var*)node;
if (var->mem_ptr != nullptr) {
vector<Stack> stacks = get_node_trace(var);
std::stringstream stream;
stream << var;
live_vars.push_back(std::make_pair(stream.str(), var->size));
live_vars.push_back(std::make_pair(std::make_pair(stream.str(), stacks), var->size));
if (!allocations.count(var->mem_ptr)) {
allocations[var->mem_ptr] = 1;
memory_size += var->size;
@ -92,7 +94,7 @@ void MemoryProfiler::check() {
}
}
bool MemoryProfiler::cmp(const std::pair<string, size_t>& a, const std::pair<string, size_t>& b) {
bool MemoryProfiler::cmp(const std::pair<std::pair<string, vector<Stack>>, size_t>& a, const std::pair<std::pair<string, vector<Stack>>, size_t>& b) {
return a.second > b.second;
}
@ -104,7 +106,11 @@ void MemoryProfiler::display_max_memory_info() {
log << "max var memory" << FloatOutput_{(double)max_memory_size, " KMG", 1024, "B"} << "\n\n";
log << "[Size]" << "[Percent]" << "[Var Info]" << "\n";
for (int i = 0; i < max_live_vars.size(); ++i) {
log << FloatOutput_{(double)max_live_vars[i].second, " KMG", 1024, "B"} << double(max_live_vars[i].second) / max_memory_size * 100 << "%" << max_live_vars[i].first << "\n\n";
log << FloatOutput_{(double)max_live_vars[i].second, " KMG", 1024, "B"}
<< double(max_live_vars[i].second) / max_memory_size * 100 << "%"
<< max_live_vars[i].first.first
<< max_live_vars[i].first.second[0].file_path + ":" + std::to_string(max_live_vars[i].first.second[0].lineno)
<< "\n\n";
}
log << "=========================\n";
log.end();
@ -114,4 +120,29 @@ void display_max_memory_info() {
memory_profiler.display_max_memory_info();
}
string MemoryProfiler::get_max_memory_info() {
std::stringstream out;
string div1 = "[!@#div1!@#]";
string div2 = "[!@#div2!@#]";
string div3 = "[!@#div3!@#]";
std::sort(max_live_vars.begin(), max_live_vars.end(), cmp);
out << max_memory_size;
for (int i = 0; i < max_live_vars.size(); ++i) {
out << div1;
out << max_live_vars[i].first.first << div2;
out << max_live_vars[i].second << div2;
for (int j = 0; j < max_live_vars[i].first.second.size(); ++j) {
out << max_live_vars[i].first.second[j].file_path + ":" + std::to_string(max_live_vars[i].first.second[j].lineno) << div3
<< max_live_vars[i].first.second[j].module_name << div3
<< max_live_vars[i].first.second[j].module_type << div2;
}
}
return out.str();
}
string get_max_memory_info() {
return memory_profiler.get_max_memory_info();
}
} // jittor

View File

@ -11,25 +11,29 @@
#include <vector>
#include <string>
#include "var.h"
#include "pybind/py_var_tracer.h"
namespace jittor {
// @pyjt(display_max_memory_info)
void display_max_memory_info();
// @pyjt(get_max_memory_info)
string get_max_memory_info();
struct MemoryProfiler {
std::map<void*, size_t> allocations;
// Max Infos
std::vector<std::pair<string, size_t>> max_live_vars;
vector<std::pair<std::pair<string, vector<Stack>>, size_t>> max_live_vars;
size_t max_used_memory_size;
size_t max_memory_size;
MemoryProfiler();
static bool cmp(const std::pair<string, size_t>& a, const std::pair<string, size_t>& b);
static bool cmp(const std::pair<std::pair<string, vector<Stack>>, size_t>& a, const std::pair<std::pair<string, vector<Stack>>, size_t>& b);
void clear();
void check();
std::pair<size_t, size_t> get_memory_info();
void display_max_memory_info();
string get_max_memory_info();
};
extern MemoryProfiler memory_profiler;

View File

@ -97,7 +97,8 @@ static vector<Stack> get_stack_info() {
PyObject* prev_obj = nullptr;
if (trace_py_var >= 3) {
// trace raw stack
auto start = std::max(0, n-5);
// auto start = std::max(0, n-5);
auto start = 0;
for (int i=start; i<n; i++) {
auto f = frames[i];
auto filename = to_string(f->f_code->co_filename);
@ -185,7 +186,7 @@ void TraceData::record_node(Node* node, bool record_stack) {
NodeData data;
data.id = node_data_cnt++;
id_map[node] = data.id;
if (!node->is_var() || trace_py_var>=3) {
if (trace_py_var) {
if (record_stack) {
if (trace_grad_op) {
auto iter = trace_data.id_map.find(trace_grad_op);
@ -363,4 +364,16 @@ void print_node_trace(const Node* node, std::ostream& os) {
os << _get_stack_info((Node*)node);
}
vector<Stack> get_node_trace(Node* node) {
auto iter = trace_data.id_map.find(node);
if (iter == trace_data.id_map.end())
return vector<Stack>();
auto node_id = iter->second;
auto iter2 = trace_data.node_data.find(node_id);
if (iter2 == trace_data.node_data.end())
return vector<Stack>();
return iter2->second.stacks;
}
} // jittor

View File

@ -67,5 +67,5 @@ struct TraceData {
extern TraceData trace_data;
void print_node_trace(const Node* node, std::ostream& os);
vector<Stack> get_node_trace(Node* node);
} // jittor