add profiler mark for easy range profiling

This commit is contained in:
Dun Liang 2022-11-16 16:05:27 +08:00
parent 5bc160b19c
commit ec2eef1fd9
5 changed files with 138 additions and 7 deletions

View File

@ -9,7 +9,7 @@
# file 'LICENSE.txt', which is part of this source code package.
# ***************************************************************
__version__ = '1.3.5.33'
__version__ = '1.3.5.34'
from jittor_utils import lock
with lock.lock_scope():
ori_int = int
@ -234,6 +234,52 @@ class profile_scope(_call_no_record_scope):
self.report.extend(profiler.report())
self.fs.__exit__(*exc)
class profile_mark(_call_no_record_scope):
def __init__(self, mark_name: str):
''' profiler mark is used for profiling part of code,
Example::
a = jt.rand(1000,1000)
b = jt.rand(1000,1000)
jt.sync_all()
results = []
with jt.profile_scope() as rep:
results.append(jt.matmul(a, b))
with jt.profile_mark("mark1"):
results.append(jt.matmul(a, b))
with jt.profile_mark("mark2"):
results.append(jt.matmul(a, b))
with jt.profile_mark("mark3"):
results.append(jt.matmul(a, b))
results.append(jt.matmul(a, b))
Output::
Total time: 46.8ms
Total Memory Access: 57.2MB
[Mark mark3] time: 9ms
[Mark mark2] time: 8.28ms
[Mark mark1] time: 17.7ms
'''
self.mark_name = mark_name
def __enter__(self):
self.options = flags.compile_options
new_options = flags.compile_options
prev_marks = "_marks:"
for x in self.options:
if x.startswith(prev_marks):
prev_marks = x
del new_options[x]
new_marks = prev_marks + self.mark_name + ','
new_options[new_marks] = 1
flags.compile_options = new_options
def __exit__(self, *exc):
flags.compile_options = self.options
class __single_process_scope:
def __init__(self, rank=0):
self.rank = rank

View File

@ -202,9 +202,11 @@ void FusedOp::do_jit_prepare(JK& jk) {
}
}
jk << "«choices:";
for (auto& kv : *loop_options)
for (auto& kv : *loop_options) {
if (kv.first.size() && kv.first[0] != '_')
jk << kv.first << ':' << kv.second << ',';
}
}
jk.finilize();
}

View File

@ -24,6 +24,7 @@
#include "profiler/memory_checker.h"
#include "misc/deleter.h"
#include "executor.h"
#include "utils/str_utils.h"
namespace jittor {
@ -55,6 +56,7 @@ void Profiler::start(int64 warmup, int64 rerun) {
if (rerun==0) rerun = profiler_rerun;
profiler_enable = 1;
profiler.records.clear();
profiler.marks.clear();
profiler.warmup = warmup;
profiler.rerun = rerun;
profiler.relay_extra_cost = 0;
@ -197,6 +199,34 @@ struct RecordExtraCost {
}
};
static string get_marks(Op* op, bool is_fused) {
loop_options_t* options = nullptr;
if (is_fused) {
auto* fop = (FusedOp*)op;
options = fop->loop_options;
} else {
if (op->outputs().size()) {
if (op->outputs().front()->loop_options)
options = &op->outputs().front()->loop_options.data();
}
}
if (!options) return string();
for (auto& kv : *options) {
if (startswith(kv.first, "_marks:")) {
return kv.first.substr(7);
}
}
return string();
}
static string origin_key(const string& s) {
if (s.size() && s[0]=='[') {
auto vs = split(s, "]");
return vs[vs.size()-1];
}
return s;
}
void Profiler::record_and_run(
jit_op_entry_t jit_entry,
Op* op,
@ -208,6 +238,18 @@ void Profiler::record_and_run(
auto ikey=jit_key_mapper.find(jit_key);
const char* key = ikey==jit_key_mapper.end() ?
jit_key : ikey->second.c_str();
bool is_fused = op->name() == string("fused");
string marks = get_marks(op, is_fused);
string new_key;
if (marks.size()) {
// add marks into key, for better report
new_key = string("[marks:");
new_key += marks;
new_key += "]";
new_key += key;
key = new_key.c_str();
}
auto iter = profiler.records.find(key);
uint64_t in, out, compute;
if (profiler.relay_fop)
@ -224,7 +266,6 @@ void Profiler::record_and_run(
iter->second.stack_info = get_stack_info(op);
}
}
bool is_fused = op->name() == string("fused");
uint64* shape_time = nullptr;
if (trace_py_var || profiler_record_shape) {
@ -307,13 +348,27 @@ void Profiler::record_and_run(
iter->second.update(rerun, total_ns, in, out, compute);
if (shape_time) shape_time[0] += total_ns;
// add markers record
if ((profiler.relay_fop == op || profiler.relay_fop == nullptr)
&& marks.size()) {
// only record not relay op
auto vs = split(marks, ",");
for (auto& mark : vs) {
if (mark.size()) {
auto& mark_info = profiler.marks[mark];
mark_info.count += 1;
mark_info.time_total += total_ns;
}
}
}
RecordExtraCost rec(profiler.relay_fop && profiler.relay_fop != op);
if (profiler_record_peek)
stat_peek_bandwidth(in, out, rerun, iter->second.peek_time_total);
LOGvvvv << "Duration" << total_ns >> "ns running" << op;
if (is_fused &&
((FusedOp*)op)->get_loop_option("check_cache")) {
auto fname = Op::get_filename_from_jit_key(key, ".so");
auto fname = Op::get_filename_from_jit_key(origin_key(key), ".so");
unique_ptr<MemoryChecker>* mc = load_memory_checker(fname);
iter->second.cache_info.reset(new CacheInfo(mc));
}
@ -337,7 +392,7 @@ vector<vector<string>> Profiler::report(const string& sort_key) {
for (auto& kv : profiler.records) {
auto& kinfo = kv.second;
names.push_back(kv.first);
fnames.push_back(Op::get_filename_from_jit_key(kv.first, ".cc"));
fnames.push_back(Op::get_filename_from_jit_key(origin_key(kv.first), ".cc"));
if (kv.second.stack_info.size()) {
fnames.back() += '\n';
fnames.back() += kv.second.stack_info.c_str();
@ -420,6 +475,11 @@ vector<vector<string>> Profiler::report(const string& sort_key) {
ss << "Total Memory Access:";
output_float(" KMG", 1024, "B", total_mem_access);
ss << '\n';
for (auto& mark : profiler.marks) {
ss << "[Mark " << mark.first << "] time:";
output_float("num ", 1000, "s", mark.second.time_total);
ss << '\n';
}
double cum_time = 0;
for (auto i : order) {
auto& name = names[i];
@ -491,7 +551,7 @@ vector<vector<string>> Profiler::report_cache(const string& sort_key) {
if (!kv.second.cache_info)
continue;
names.push_back(kv.first);
fnames.push_back(Op::get_filename_from_jit_key(kv.first, ".cc"));
fnames.push_back(Op::get_filename_from_jit_key(origin_key(kv.first), ".cc"));
CacheInfo& kinfo = *kv.second.cache_info;
order.push_back(order.size());
vector<double> one_info = {(double)kinfo.check_times, ((double)kinfo.tlb_miss_times) / kinfo.check_times};

View File

@ -61,6 +61,11 @@ struct Profiler {
int64 relay_extra_cost;
FusedOp* relay_fop;
struct MarkInfo {
uint64_t count, time_total;
};
unordered_map<string, Info> marks;
~Profiler();
};

View File

@ -20,5 +20,23 @@ class TestProfiler(unittest.TestCase):
y = float(rep[-2][4])
assert abs(x-y)/x < 1e-3
def test_marks(self):
a = jt.rand(1000,1000)
b = jt.rand(1000,1000)
jt.sync_all()
results = []
with jt.profile_scope() as rep:
results.append(jt.matmul(a, b))
with jt.profile_mark("mark1"):
results.append(jt.matmul(a, b))
with jt.profile_mark("mark2"):
results.append(jt.matmul(a, b))
with jt.profile_mark("mark3"):
results.append(jt.matmul(a, b))
results.append(jt.matmul(a, b))
jt.sync_all()
assert len(rep) == 6
if __name__ == "__main__":
unittest.main()