add meminfo and grad warning

This commit is contained in:
Dun Liang 2020-05-11 13:26:23 +08:00
parent c657491a51
commit 1c19d5837d
8 changed files with 175 additions and 102 deletions

View File

@ -122,7 +122,7 @@ vector<VarPtr> grad(Var* loss, vector<Var*> targets) {
if (var->tflag == nt)
grad = move(grads[var->custom_data]);
if (!grad) {
LOGvvv << var << "grads[">>i>>"] set to zero";
LOGw << "grads[">>i>>"] doesn't have gradient. It will be set to zero:" << var;
grad = make_number(0.f, var);
assign_attrs(grad.ptr, var);
registe_node_trace_grad(grad.ptr, var, 0);

View File

@ -4,12 +4,6 @@
// file 'LICENSE.txt', which is part of this source code package.
// ***************************************************************
#include <typeinfo>
#include <iomanip>
#include <sys/sysinfo.h>
#include "var.h"
#include "op.h"
#include "var_holder.h"
#include "misc/cuda_flags.h"
#include "mem/allocator/aligned_allocator.h"
@ -92,67 +86,4 @@ void gc_all() {
for (auto& kv : allocators) kv.second->gc();
}
struct FloatOutput {
double value;
string scale;
int base;
string suffix;
int p=4;
};
std::ostream& operator<<(std::ostream& os, const FloatOutput& o) {
int w = 8;
os << std::setw(w-2-o.suffix.size());
os << std::setprecision(o.p);
uint i=0;
double k = o.value;
for (; i+1<o.scale.size(); i++) {
if (k<o.base) break;
k /= o.base;
}
os << k << o.scale[i];
return os << o.suffix;
}
void display_memory_info(const char* fileline) {
int p = 2;
Log log(fileline, 'i', 0);
log << "\n=== display_memory_info ===\n";
log << "hold_vars:" << VarHolder::hold_vars.size()
<< "lived_vars:" << Var::number_of_lived_vars
<< "lived_ops:" << Op::number_of_lived_ops >> '\n';
if (use_stat_allocator) {
log << "stat:" << use_stat_allocator;
log << "total alloc:" << FloatOutput{(double)(stat_allocator_total_alloc_byte
- stat_allocator_total_free_byte), " KMG", 1024, "B"};
log << "total alloc call:" << FloatOutput{(double)(stat_allocator_total_alloc_call
- stat_allocator_total_free_call), " KMG", 1000, ""} >> '\n';
}
for (auto& a : SFRLAllocator::sfrl_allocators) {
auto total = a->used_memory + a->unused_memory;
log << "name:" << a->name() << "is_cuda:" << a->is_cuda()
<< "used:" << FloatOutput{(double)a->used_memory, " KMG", 1024, "B"}
>> "(" >> std::setprecision(p) >> a->used_memory*100.0 / total >> "%)"
<< "unused:" << FloatOutput{(double)a->unused_memory, " KMG", 1024, "B"}
>> "(" >> std::setprecision(p) >> a->unused_memory*100.0 / total >> "%)"
<< "total:" << FloatOutput{(double)total, " KMG", 1024, "B"} >> "\n";
}
log >> "===========================\n";
log.end();
}
MemInfo::MemInfo() {
struct sysinfo info = {0};
sysinfo(&info);
total_cpu_ram = info.totalram;
total_cuda_ram = 0;
#ifdef HAS_CUDA
cudaDeviceProp prop = {0};
cudaGetDeviceProperties(&prop, 0);
total_cuda_ram = prop.totalGlobalMem;
#endif
}
MemInfo mem_info;
} // jittor

View File

@ -5,6 +5,7 @@
// ***************************************************************
#pragma once
#include "common.h"
#include "mem/mem_info.h"
namespace jittor {
@ -51,24 +52,4 @@ Allocator* get_allocator();
// @pyjt(gc)
void gc_all();
// @pyjt(display_memory_info)
void display_memory_info(const char* fileline="");
// @pyjt(MemInfo)
struct MemInfo {
// @pyjt(total_cpu_ram)
int64 total_cpu_ram;
// @pyjt(total_cuda_ram)
int64 total_cuda_ram;
inline MemInfo(const MemInfo&) = default;
MemInfo();
};
extern MemInfo mem_info;
// @pyjt(get_mem_info)
inline MemInfo get_mem_info() { return mem_info; }
} // jittor

110
src/mem/mem_info.cc Normal file
View File

@ -0,0 +1,110 @@
// ***************************************************************
// Copyright (c) 2020 Jittor. Authors: Dun Liang <randonlang@gmail.com>. All Rights Reserved.
// This file is subject to the terms and conditions defined in
// file 'LICENSE.txt', which is part of this source code package.
// ***************************************************************
#include <iomanip>
#include <algorithm>
#include <sys/sysinfo.h>
#include "var.h"
#include "op.h"
#include "var_holder.h"
#include "graph.h"
#include "misc/cuda_flags.h"
#include "mem/allocator/sfrl_allocator.h"
#include "mem/allocator/stat_allocator.h"
#include "mem/mem_info.h"
namespace jittor {
struct FloatOutput {
double value;
string scale;
int base;
string suffix;
int p=4;
};
std::ostream& operator<<(std::ostream& os, const FloatOutput& o) {
int w = 8;
os << std::setw(w-2-o.suffix.size());
os << std::setprecision(o.p);
uint i=0;
double k = o.value;
for (; i+1<o.scale.size(); i++) {
if (k<o.base) break;
k /= o.base;
}
os << k << o.scale[i];
return os << o.suffix;
}
void display_memory_info(const char* fileline) {
int p = 3;
Log log(fileline, 'i', 0);
log << "\n=== display_memory_info ===\n";
log << "total_cpu_ram:" <<
FloatOutput{(double)mem_info.total_cpu_ram, " KMG", 1024, "B"};
log << "total_cuda_ram:" <<
FloatOutput{(double)mem_info.total_cuda_ram, " KMG", 1024, "B"} >> "\n";
log << "hold_vars:" << VarHolder::hold_vars.size()
<< "lived_vars:" << Var::number_of_lived_vars
<< "lived_ops:" << Op::number_of_lived_ops >> '\n';
#ifdef NODE_MEMCHECK
// get the oldest var
vector<Node*> queue;
auto t = ++Node::tflag_count;
for (auto& vh : VarHolder::hold_vars)
if (vh->var->tflag != t) {
vh->var->tflag = t;
queue.push_back(vh->var);
}
bfs_both(queue, [](Node*){return true;});
vector<pair<int64, Node*>> nodes;
nodes.reserve(queue.size());
for (auto* node : queue)
nodes.push_back({node->__id(), node});
std::sort(nodes.begin(), nodes.end());
log << "list of the oldest nodes:\n";
for (int i=0; i<10 && i<nodes.size(); i++) {
log << "ID#" >> nodes[i].first >> ":" << nodes[i].second << "\n";
}
#endif
if (use_stat_allocator) {
log << "stat:" << use_stat_allocator;
log << "total alloc:" << FloatOutput{(double)(stat_allocator_total_alloc_byte
- stat_allocator_total_free_byte), " KMG", 1024, "B"};
log << "total alloc call:" << FloatOutput{(double)(stat_allocator_total_alloc_call
- stat_allocator_total_free_call), " KMG", 1000, ""} >> '\n';
}
for (auto& a : SFRLAllocator::sfrl_allocators) {
auto total = a->used_memory + a->unused_memory;
log << "name:" << a->name() << "is_cuda:" << a->is_cuda()
<< "used:" << FloatOutput{(double)a->used_memory, " KMG", 1024, "B"}
>> "(" >> std::setprecision(p) >> a->used_memory*100.0 / total >> "%)"
<< "unused:" << FloatOutput{(double)a->unused_memory, " KMG", 1024, "B"}
>> "(" >> std::setprecision(p) >> a->unused_memory*100.0 / total >> "%)"
<< "total:" << FloatOutput{(double)total, " KMG", 1024, "B"} >> "\n";
}
log >> "===========================\n";
log.end();
}
MemInfo::MemInfo() {
struct sysinfo info = {0};
sysinfo(&info);
total_cpu_ram = info.totalram;
total_cuda_ram = 0;
#ifdef HAS_CUDA
cudaDeviceProp prop = {0};
cudaGetDeviceProperties(&prop, 0);
total_cuda_ram = prop.totalGlobalMem;
#endif
}
MemInfo mem_info;
} // jittor

31
src/mem/mem_info.h Normal file
View File

@ -0,0 +1,31 @@
// ***************************************************************
// Copyright (c) 2020 Jittor. Authors: Dun Liang <randonlang@gmail.com>. All Rights Reserved.
// This file is subject to the terms and conditions defined in
// file 'LICENSE.txt', which is part of this source code package.
// ***************************************************************
#pragma once
#include "common.h"
namespace jittor {
// @pyjt(display_memory_info)
void display_memory_info(const char* fileline="");
// @pyjt(MemInfo)
struct MemInfo {
// @pyjt(total_cpu_ram)
int64 total_cpu_ram;
// @pyjt(total_cuda_ram)
int64 total_cuda_ram;
inline MemInfo(const MemInfo&) = default;
MemInfo();
};
extern MemInfo mem_info;
// @pyjt(get_mem_info)
inline MemInfo get_mem_info() { return mem_info; }
} // jittor

View File

@ -15,23 +15,33 @@ using namespace pybind11::literals;
namespace jittor {
DEFINE_FLAG(int, trace_py_var, 0, "Trace py stack for debug.");
DEFINE_FLAG(int, trace_py_var, 0, "Trace py stack max depth for debug.");
unordered_map<const Node*, string> trace_data;
void __registe_node_trace(Node* node) {
auto py_stack =
auto py_stacks =
py::module::import("traceback")
.attr("extract_stack")(nullptr, 1).attr("__getitem__")(0);
auto filename = py_stack.attr("filename").cast<string>();
auto basename = split(filename, "/").back();
basename += ':';
basename += py_stack.attr("name").cast<string>();
basename += ':';
basename += S(py_stack.attr("lineno").cast<int>());
basename += ':';
basename += py_stack.attr("line").cast<string>();
trace_data[node] = basename;
.attr("extract_stack")(nullptr, trace_py_var);
auto len = py_stacks.attr("__len__")().cast<int>();
string info;
for (int i=0; i<len; i++) {
auto py_stack = py_stacks.attr("__getitem__")(i);
auto filename = py_stack.attr("filename").cast<string>();
if (len==1)
info += split(filename, "/").back();
else {
info += "\n ";
info += filename;
}
info += ':';
info += py_stack.attr("name").cast<string>();
info += ':';
info += S(py_stack.attr("lineno").cast<int>());
info += ':';
info += py_stack.attr("line").cast<string>();
}
trace_data[node] = info;
}
void __unregiste_node_trace(Node* node) {

View File

@ -3,6 +3,7 @@
// This file is subject to the terms and conditions defined in
// file 'LICENSE.txt', which is part of this source code package.
// ***************************************************************
#include <sstream>
#ifdef HAS_CUDA
#include <cuda_runtime.h>
#include <helper_cuda.h>
@ -122,4 +123,10 @@ vector<ArrayArgs> fetch_sync(const vector<VarHolder*>& vh) {
return ret;
}
string VarHolder::debug_msg() {
std::stringstream ss;
ss << var;
return ss.str();
}
} // jittor

View File

@ -154,6 +154,9 @@ struct VarHolder {
#endif
std::memcpy(var->mem_ptr, array.ptr, size);
}
// @pyjt(debug_msg)
string debug_msg();
};
// @pyjt(sync)