add profiler mark for easy range profiling

This commit is contained in:
Dun Liang 2022-11-16 16:05:27 +08:00
parent 5bc160b19c
commit ec2eef1fd9
5 changed files with 138 additions and 7 deletions

View File

@ -9,7 +9,7 @@
# file 'LICENSE.txt', which is part of this source code package. # file 'LICENSE.txt', which is part of this source code package.
# *************************************************************** # ***************************************************************
__version__ = '1.3.5.33' __version__ = '1.3.5.34'
from jittor_utils import lock from jittor_utils import lock
with lock.lock_scope(): with lock.lock_scope():
ori_int = int ori_int = int
@ -234,6 +234,52 @@ class profile_scope(_call_no_record_scope):
self.report.extend(profiler.report()) self.report.extend(profiler.report())
self.fs.__exit__(*exc) self.fs.__exit__(*exc)
class profile_mark(_call_no_record_scope):
def __init__(self, mark_name: str):
''' profiler mark is used for profiling part of code,
Example::
a = jt.rand(1000,1000)
b = jt.rand(1000,1000)
jt.sync_all()
results = []
with jt.profile_scope() as rep:
results.append(jt.matmul(a, b))
with jt.profile_mark("mark1"):
results.append(jt.matmul(a, b))
with jt.profile_mark("mark2"):
results.append(jt.matmul(a, b))
with jt.profile_mark("mark3"):
results.append(jt.matmul(a, b))
results.append(jt.matmul(a, b))
Output::
Total time: 46.8ms
Total Memory Access: 57.2MB
[Mark mark3] time: 9ms
[Mark mark2] time: 8.28ms
[Mark mark1] time: 17.7ms
'''
self.mark_name = mark_name
def __enter__(self):
self.options = flags.compile_options
new_options = flags.compile_options
prev_marks = "_marks:"
for x in self.options:
if x.startswith(prev_marks):
prev_marks = x
del new_options[x]
new_marks = prev_marks + self.mark_name + ','
new_options[new_marks] = 1
flags.compile_options = new_options
def __exit__(self, *exc):
flags.compile_options = self.options
class __single_process_scope: class __single_process_scope:
def __init__(self, rank=0): def __init__(self, rank=0):
self.rank = rank self.rank = rank

View File

@ -202,8 +202,10 @@ void FusedOp::do_jit_prepare(JK& jk) {
} }
} }
jk << "«choices:"; jk << "«choices:";
for (auto& kv : *loop_options) for (auto& kv : *loop_options) {
jk << kv.first << ':' << kv.second << ','; if (kv.first.size() && kv.first[0] != '_')
jk << kv.first << ':' << kv.second << ',';
}
} }
jk.finilize(); jk.finilize();
} }

View File

@ -24,6 +24,7 @@
#include "profiler/memory_checker.h" #include "profiler/memory_checker.h"
#include "misc/deleter.h" #include "misc/deleter.h"
#include "executor.h" #include "executor.h"
#include "utils/str_utils.h"
namespace jittor { namespace jittor {
@ -55,6 +56,7 @@ void Profiler::start(int64 warmup, int64 rerun) {
if (rerun==0) rerun = profiler_rerun; if (rerun==0) rerun = profiler_rerun;
profiler_enable = 1; profiler_enable = 1;
profiler.records.clear(); profiler.records.clear();
profiler.marks.clear();
profiler.warmup = warmup; profiler.warmup = warmup;
profiler.rerun = rerun; profiler.rerun = rerun;
profiler.relay_extra_cost = 0; profiler.relay_extra_cost = 0;
@ -197,6 +199,34 @@ struct RecordExtraCost {
} }
}; };
static string get_marks(Op* op, bool is_fused) {
loop_options_t* options = nullptr;
if (is_fused) {
auto* fop = (FusedOp*)op;
options = fop->loop_options;
} else {
if (op->outputs().size()) {
if (op->outputs().front()->loop_options)
options = &op->outputs().front()->loop_options.data();
}
}
if (!options) return string();
for (auto& kv : *options) {
if (startswith(kv.first, "_marks:")) {
return kv.first.substr(7);
}
}
return string();
}
static string origin_key(const string& s) {
if (s.size() && s[0]=='[') {
auto vs = split(s, "]");
return vs[vs.size()-1];
}
return s;
}
void Profiler::record_and_run( void Profiler::record_and_run(
jit_op_entry_t jit_entry, jit_op_entry_t jit_entry,
Op* op, Op* op,
@ -208,6 +238,18 @@ void Profiler::record_and_run(
auto ikey=jit_key_mapper.find(jit_key); auto ikey=jit_key_mapper.find(jit_key);
const char* key = ikey==jit_key_mapper.end() ? const char* key = ikey==jit_key_mapper.end() ?
jit_key : ikey->second.c_str(); jit_key : ikey->second.c_str();
bool is_fused = op->name() == string("fused");
string marks = get_marks(op, is_fused);
string new_key;
if (marks.size()) {
// add marks into key, for better report
new_key = string("[marks:");
new_key += marks;
new_key += "]";
new_key += key;
key = new_key.c_str();
}
auto iter = profiler.records.find(key); auto iter = profiler.records.find(key);
uint64_t in, out, compute; uint64_t in, out, compute;
if (profiler.relay_fop) if (profiler.relay_fop)
@ -224,7 +266,6 @@ void Profiler::record_and_run(
iter->second.stack_info = get_stack_info(op); iter->second.stack_info = get_stack_info(op);
} }
} }
bool is_fused = op->name() == string("fused");
uint64* shape_time = nullptr; uint64* shape_time = nullptr;
if (trace_py_var || profiler_record_shape) { if (trace_py_var || profiler_record_shape) {
@ -306,6 +347,20 @@ void Profiler::record_and_run(
total_ns = std::max((int64_t)1, total_ns-24); total_ns = std::max((int64_t)1, total_ns-24);
iter->second.update(rerun, total_ns, in, out, compute); iter->second.update(rerun, total_ns, in, out, compute);
if (shape_time) shape_time[0] += total_ns; if (shape_time) shape_time[0] += total_ns;
// add markers record
if ((profiler.relay_fop == op || profiler.relay_fop == nullptr)
&& marks.size()) {
// only record not relay op
auto vs = split(marks, ",");
for (auto& mark : vs) {
if (mark.size()) {
auto& mark_info = profiler.marks[mark];
mark_info.count += 1;
mark_info.time_total += total_ns;
}
}
}
RecordExtraCost rec(profiler.relay_fop && profiler.relay_fop != op); RecordExtraCost rec(profiler.relay_fop && profiler.relay_fop != op);
if (profiler_record_peek) if (profiler_record_peek)
@ -313,7 +368,7 @@ void Profiler::record_and_run(
LOGvvvv << "Duration" << total_ns >> "ns running" << op; LOGvvvv << "Duration" << total_ns >> "ns running" << op;
if (is_fused && if (is_fused &&
((FusedOp*)op)->get_loop_option("check_cache")) { ((FusedOp*)op)->get_loop_option("check_cache")) {
auto fname = Op::get_filename_from_jit_key(key, ".so"); auto fname = Op::get_filename_from_jit_key(origin_key(key), ".so");
unique_ptr<MemoryChecker>* mc = load_memory_checker(fname); unique_ptr<MemoryChecker>* mc = load_memory_checker(fname);
iter->second.cache_info.reset(new CacheInfo(mc)); iter->second.cache_info.reset(new CacheInfo(mc));
} }
@ -337,7 +392,7 @@ vector<vector<string>> Profiler::report(const string& sort_key) {
for (auto& kv : profiler.records) { for (auto& kv : profiler.records) {
auto& kinfo = kv.second; auto& kinfo = kv.second;
names.push_back(kv.first); names.push_back(kv.first);
fnames.push_back(Op::get_filename_from_jit_key(kv.first, ".cc")); fnames.push_back(Op::get_filename_from_jit_key(origin_key(kv.first), ".cc"));
if (kv.second.stack_info.size()) { if (kv.second.stack_info.size()) {
fnames.back() += '\n'; fnames.back() += '\n';
fnames.back() += kv.second.stack_info.c_str(); fnames.back() += kv.second.stack_info.c_str();
@ -420,6 +475,11 @@ vector<vector<string>> Profiler::report(const string& sort_key) {
ss << "Total Memory Access:"; ss << "Total Memory Access:";
output_float(" KMG", 1024, "B", total_mem_access); output_float(" KMG", 1024, "B", total_mem_access);
ss << '\n'; ss << '\n';
for (auto& mark : profiler.marks) {
ss << "[Mark " << mark.first << "] time:";
output_float("num ", 1000, "s", mark.second.time_total);
ss << '\n';
}
double cum_time = 0; double cum_time = 0;
for (auto i : order) { for (auto i : order) {
auto& name = names[i]; auto& name = names[i];
@ -491,7 +551,7 @@ vector<vector<string>> Profiler::report_cache(const string& sort_key) {
if (!kv.second.cache_info) if (!kv.second.cache_info)
continue; continue;
names.push_back(kv.first); names.push_back(kv.first);
fnames.push_back(Op::get_filename_from_jit_key(kv.first, ".cc")); fnames.push_back(Op::get_filename_from_jit_key(origin_key(kv.first), ".cc"));
CacheInfo& kinfo = *kv.second.cache_info; CacheInfo& kinfo = *kv.second.cache_info;
order.push_back(order.size()); order.push_back(order.size());
vector<double> one_info = {(double)kinfo.check_times, ((double)kinfo.tlb_miss_times) / kinfo.check_times}; vector<double> one_info = {(double)kinfo.check_times, ((double)kinfo.tlb_miss_times) / kinfo.check_times};

View File

@ -61,6 +61,11 @@ struct Profiler {
int64 relay_extra_cost; int64 relay_extra_cost;
FusedOp* relay_fop; FusedOp* relay_fop;
struct MarkInfo {
uint64_t count, time_total;
};
unordered_map<string, Info> marks;
~Profiler(); ~Profiler();
}; };

View File

@ -20,5 +20,23 @@ class TestProfiler(unittest.TestCase):
y = float(rep[-2][4]) y = float(rep[-2][4])
assert abs(x-y)/x < 1e-3 assert abs(x-y)/x < 1e-3
def test_marks(self):
a = jt.rand(1000,1000)
b = jt.rand(1000,1000)
jt.sync_all()
results = []
with jt.profile_scope() as rep:
results.append(jt.matmul(a, b))
with jt.profile_mark("mark1"):
results.append(jt.matmul(a, b))
with jt.profile_mark("mark2"):
results.append(jt.matmul(a, b))
with jt.profile_mark("mark3"):
results.append(jt.matmul(a, b))
results.append(jt.matmul(a, b))
jt.sync_all()
assert len(rep) == 6
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()