mirror of https://github.com/Jittor/Jittor
add profiler mark for easy range profiling
This commit is contained in:
parent
5bc160b19c
commit
ec2eef1fd9
|
@ -9,7 +9,7 @@
|
||||||
# file 'LICENSE.txt', which is part of this source code package.
|
# file 'LICENSE.txt', which is part of this source code package.
|
||||||
# ***************************************************************
|
# ***************************************************************
|
||||||
|
|
||||||
__version__ = '1.3.5.33'
|
__version__ = '1.3.5.34'
|
||||||
from jittor_utils import lock
|
from jittor_utils import lock
|
||||||
with lock.lock_scope():
|
with lock.lock_scope():
|
||||||
ori_int = int
|
ori_int = int
|
||||||
|
@ -234,6 +234,52 @@ class profile_scope(_call_no_record_scope):
|
||||||
self.report.extend(profiler.report())
|
self.report.extend(profiler.report())
|
||||||
self.fs.__exit__(*exc)
|
self.fs.__exit__(*exc)
|
||||||
|
|
||||||
|
|
||||||
|
class profile_mark(_call_no_record_scope):
|
||||||
|
def __init__(self, mark_name: str):
|
||||||
|
''' profiler mark is used for profiling part of code,
|
||||||
|
|
||||||
|
Example::
|
||||||
|
|
||||||
|
a = jt.rand(1000,1000)
|
||||||
|
b = jt.rand(1000,1000)
|
||||||
|
jt.sync_all()
|
||||||
|
results = []
|
||||||
|
with jt.profile_scope() as rep:
|
||||||
|
results.append(jt.matmul(a, b))
|
||||||
|
with jt.profile_mark("mark1"):
|
||||||
|
results.append(jt.matmul(a, b))
|
||||||
|
with jt.profile_mark("mark2"):
|
||||||
|
results.append(jt.matmul(a, b))
|
||||||
|
with jt.profile_mark("mark3"):
|
||||||
|
results.append(jt.matmul(a, b))
|
||||||
|
results.append(jt.matmul(a, b))
|
||||||
|
|
||||||
|
Output::
|
||||||
|
|
||||||
|
Total time: 46.8ms
|
||||||
|
Total Memory Access: 57.2MB
|
||||||
|
[Mark mark3] time: 9ms
|
||||||
|
[Mark mark2] time: 8.28ms
|
||||||
|
[Mark mark1] time: 17.7ms
|
||||||
|
|
||||||
|
'''
|
||||||
|
self.mark_name = mark_name
|
||||||
|
def __enter__(self):
|
||||||
|
self.options = flags.compile_options
|
||||||
|
new_options = flags.compile_options
|
||||||
|
prev_marks = "_marks:"
|
||||||
|
for x in self.options:
|
||||||
|
if x.startswith(prev_marks):
|
||||||
|
prev_marks = x
|
||||||
|
del new_options[x]
|
||||||
|
new_marks = prev_marks + self.mark_name + ','
|
||||||
|
new_options[new_marks] = 1
|
||||||
|
flags.compile_options = new_options
|
||||||
|
|
||||||
|
def __exit__(self, *exc):
|
||||||
|
flags.compile_options = self.options
|
||||||
|
|
||||||
class __single_process_scope:
|
class __single_process_scope:
|
||||||
def __init__(self, rank=0):
|
def __init__(self, rank=0):
|
||||||
self.rank = rank
|
self.rank = rank
|
||||||
|
|
|
@ -202,8 +202,10 @@ void FusedOp::do_jit_prepare(JK& jk) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
jk << "«choices:";
|
jk << "«choices:";
|
||||||
for (auto& kv : *loop_options)
|
for (auto& kv : *loop_options) {
|
||||||
jk << kv.first << ':' << kv.second << ',';
|
if (kv.first.size() && kv.first[0] != '_')
|
||||||
|
jk << kv.first << ':' << kv.second << ',';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
jk.finilize();
|
jk.finilize();
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "profiler/memory_checker.h"
|
#include "profiler/memory_checker.h"
|
||||||
#include "misc/deleter.h"
|
#include "misc/deleter.h"
|
||||||
#include "executor.h"
|
#include "executor.h"
|
||||||
|
#include "utils/str_utils.h"
|
||||||
|
|
||||||
namespace jittor {
|
namespace jittor {
|
||||||
|
|
||||||
|
@ -55,6 +56,7 @@ void Profiler::start(int64 warmup, int64 rerun) {
|
||||||
if (rerun==0) rerun = profiler_rerun;
|
if (rerun==0) rerun = profiler_rerun;
|
||||||
profiler_enable = 1;
|
profiler_enable = 1;
|
||||||
profiler.records.clear();
|
profiler.records.clear();
|
||||||
|
profiler.marks.clear();
|
||||||
profiler.warmup = warmup;
|
profiler.warmup = warmup;
|
||||||
profiler.rerun = rerun;
|
profiler.rerun = rerun;
|
||||||
profiler.relay_extra_cost = 0;
|
profiler.relay_extra_cost = 0;
|
||||||
|
@ -197,6 +199,34 @@ struct RecordExtraCost {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static string get_marks(Op* op, bool is_fused) {
|
||||||
|
loop_options_t* options = nullptr;
|
||||||
|
if (is_fused) {
|
||||||
|
auto* fop = (FusedOp*)op;
|
||||||
|
options = fop->loop_options;
|
||||||
|
} else {
|
||||||
|
if (op->outputs().size()) {
|
||||||
|
if (op->outputs().front()->loop_options)
|
||||||
|
options = &op->outputs().front()->loop_options.data();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!options) return string();
|
||||||
|
for (auto& kv : *options) {
|
||||||
|
if (startswith(kv.first, "_marks:")) {
|
||||||
|
return kv.first.substr(7);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return string();
|
||||||
|
}
|
||||||
|
|
||||||
|
static string origin_key(const string& s) {
|
||||||
|
if (s.size() && s[0]=='[') {
|
||||||
|
auto vs = split(s, "]");
|
||||||
|
return vs[vs.size()-1];
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
void Profiler::record_and_run(
|
void Profiler::record_and_run(
|
||||||
jit_op_entry_t jit_entry,
|
jit_op_entry_t jit_entry,
|
||||||
Op* op,
|
Op* op,
|
||||||
|
@ -208,6 +238,18 @@ void Profiler::record_and_run(
|
||||||
auto ikey=jit_key_mapper.find(jit_key);
|
auto ikey=jit_key_mapper.find(jit_key);
|
||||||
const char* key = ikey==jit_key_mapper.end() ?
|
const char* key = ikey==jit_key_mapper.end() ?
|
||||||
jit_key : ikey->second.c_str();
|
jit_key : ikey->second.c_str();
|
||||||
|
bool is_fused = op->name() == string("fused");
|
||||||
|
string marks = get_marks(op, is_fused);
|
||||||
|
string new_key;
|
||||||
|
if (marks.size()) {
|
||||||
|
// add marks into key, for better report
|
||||||
|
new_key = string("[marks:");
|
||||||
|
new_key += marks;
|
||||||
|
new_key += "]";
|
||||||
|
new_key += key;
|
||||||
|
key = new_key.c_str();
|
||||||
|
}
|
||||||
|
|
||||||
auto iter = profiler.records.find(key);
|
auto iter = profiler.records.find(key);
|
||||||
uint64_t in, out, compute;
|
uint64_t in, out, compute;
|
||||||
if (profiler.relay_fop)
|
if (profiler.relay_fop)
|
||||||
|
@ -224,7 +266,6 @@ void Profiler::record_and_run(
|
||||||
iter->second.stack_info = get_stack_info(op);
|
iter->second.stack_info = get_stack_info(op);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool is_fused = op->name() == string("fused");
|
|
||||||
|
|
||||||
uint64* shape_time = nullptr;
|
uint64* shape_time = nullptr;
|
||||||
if (trace_py_var || profiler_record_shape) {
|
if (trace_py_var || profiler_record_shape) {
|
||||||
|
@ -306,6 +347,20 @@ void Profiler::record_and_run(
|
||||||
total_ns = std::max((int64_t)1, total_ns-24);
|
total_ns = std::max((int64_t)1, total_ns-24);
|
||||||
iter->second.update(rerun, total_ns, in, out, compute);
|
iter->second.update(rerun, total_ns, in, out, compute);
|
||||||
if (shape_time) shape_time[0] += total_ns;
|
if (shape_time) shape_time[0] += total_ns;
|
||||||
|
|
||||||
|
// add markers record
|
||||||
|
if ((profiler.relay_fop == op || profiler.relay_fop == nullptr)
|
||||||
|
&& marks.size()) {
|
||||||
|
// only record not relay op
|
||||||
|
auto vs = split(marks, ",");
|
||||||
|
for (auto& mark : vs) {
|
||||||
|
if (mark.size()) {
|
||||||
|
auto& mark_info = profiler.marks[mark];
|
||||||
|
mark_info.count += 1;
|
||||||
|
mark_info.time_total += total_ns;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
RecordExtraCost rec(profiler.relay_fop && profiler.relay_fop != op);
|
RecordExtraCost rec(profiler.relay_fop && profiler.relay_fop != op);
|
||||||
if (profiler_record_peek)
|
if (profiler_record_peek)
|
||||||
|
@ -313,7 +368,7 @@ void Profiler::record_and_run(
|
||||||
LOGvvvv << "Duration" << total_ns >> "ns running" << op;
|
LOGvvvv << "Duration" << total_ns >> "ns running" << op;
|
||||||
if (is_fused &&
|
if (is_fused &&
|
||||||
((FusedOp*)op)->get_loop_option("check_cache")) {
|
((FusedOp*)op)->get_loop_option("check_cache")) {
|
||||||
auto fname = Op::get_filename_from_jit_key(key, ".so");
|
auto fname = Op::get_filename_from_jit_key(origin_key(key), ".so");
|
||||||
unique_ptr<MemoryChecker>* mc = load_memory_checker(fname);
|
unique_ptr<MemoryChecker>* mc = load_memory_checker(fname);
|
||||||
iter->second.cache_info.reset(new CacheInfo(mc));
|
iter->second.cache_info.reset(new CacheInfo(mc));
|
||||||
}
|
}
|
||||||
|
@ -337,7 +392,7 @@ vector<vector<string>> Profiler::report(const string& sort_key) {
|
||||||
for (auto& kv : profiler.records) {
|
for (auto& kv : profiler.records) {
|
||||||
auto& kinfo = kv.second;
|
auto& kinfo = kv.second;
|
||||||
names.push_back(kv.first);
|
names.push_back(kv.first);
|
||||||
fnames.push_back(Op::get_filename_from_jit_key(kv.first, ".cc"));
|
fnames.push_back(Op::get_filename_from_jit_key(origin_key(kv.first), ".cc"));
|
||||||
if (kv.second.stack_info.size()) {
|
if (kv.second.stack_info.size()) {
|
||||||
fnames.back() += '\n';
|
fnames.back() += '\n';
|
||||||
fnames.back() += kv.second.stack_info.c_str();
|
fnames.back() += kv.second.stack_info.c_str();
|
||||||
|
@ -420,6 +475,11 @@ vector<vector<string>> Profiler::report(const string& sort_key) {
|
||||||
ss << "Total Memory Access:";
|
ss << "Total Memory Access:";
|
||||||
output_float(" KMG", 1024, "B", total_mem_access);
|
output_float(" KMG", 1024, "B", total_mem_access);
|
||||||
ss << '\n';
|
ss << '\n';
|
||||||
|
for (auto& mark : profiler.marks) {
|
||||||
|
ss << "[Mark " << mark.first << "] time:";
|
||||||
|
output_float("num ", 1000, "s", mark.second.time_total);
|
||||||
|
ss << '\n';
|
||||||
|
}
|
||||||
double cum_time = 0;
|
double cum_time = 0;
|
||||||
for (auto i : order) {
|
for (auto i : order) {
|
||||||
auto& name = names[i];
|
auto& name = names[i];
|
||||||
|
@ -491,7 +551,7 @@ vector<vector<string>> Profiler::report_cache(const string& sort_key) {
|
||||||
if (!kv.second.cache_info)
|
if (!kv.second.cache_info)
|
||||||
continue;
|
continue;
|
||||||
names.push_back(kv.first);
|
names.push_back(kv.first);
|
||||||
fnames.push_back(Op::get_filename_from_jit_key(kv.first, ".cc"));
|
fnames.push_back(Op::get_filename_from_jit_key(origin_key(kv.first), ".cc"));
|
||||||
CacheInfo& kinfo = *kv.second.cache_info;
|
CacheInfo& kinfo = *kv.second.cache_info;
|
||||||
order.push_back(order.size());
|
order.push_back(order.size());
|
||||||
vector<double> one_info = {(double)kinfo.check_times, ((double)kinfo.tlb_miss_times) / kinfo.check_times};
|
vector<double> one_info = {(double)kinfo.check_times, ((double)kinfo.tlb_miss_times) / kinfo.check_times};
|
||||||
|
|
|
@ -61,6 +61,11 @@ struct Profiler {
|
||||||
int64 relay_extra_cost;
|
int64 relay_extra_cost;
|
||||||
FusedOp* relay_fop;
|
FusedOp* relay_fop;
|
||||||
|
|
||||||
|
struct MarkInfo {
|
||||||
|
uint64_t count, time_total;
|
||||||
|
};
|
||||||
|
unordered_map<string, Info> marks;
|
||||||
|
|
||||||
~Profiler();
|
~Profiler();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -20,5 +20,23 @@ class TestProfiler(unittest.TestCase):
|
||||||
y = float(rep[-2][4])
|
y = float(rep[-2][4])
|
||||||
assert abs(x-y)/x < 1e-3
|
assert abs(x-y)/x < 1e-3
|
||||||
|
|
||||||
|
def test_marks(self):
|
||||||
|
a = jt.rand(1000,1000)
|
||||||
|
b = jt.rand(1000,1000)
|
||||||
|
jt.sync_all()
|
||||||
|
results = []
|
||||||
|
with jt.profile_scope() as rep:
|
||||||
|
results.append(jt.matmul(a, b))
|
||||||
|
with jt.profile_mark("mark1"):
|
||||||
|
results.append(jt.matmul(a, b))
|
||||||
|
with jt.profile_mark("mark2"):
|
||||||
|
results.append(jt.matmul(a, b))
|
||||||
|
with jt.profile_mark("mark3"):
|
||||||
|
results.append(jt.matmul(a, b))
|
||||||
|
results.append(jt.matmul(a, b))
|
||||||
|
jt.sync_all()
|
||||||
|
assert len(rep) == 6
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
Loading…
Reference in New Issue