From a1c4785a6be9319849267c7b670af1a792ff6716 Mon Sep 17 00:00:00 2001 From: cxjyxx_me <498731903@qq.com> Date: Mon, 28 Dec 2020 16:13:35 +0800 Subject: [PATCH] memory_profiler --- python/jittor/misc.py | 2 +- python/jittor/test/test_memory_profiler.py | 87 +++++++++++++++ src/executor.cc | 3 + src/memory_profiler.cc | 117 +++++++++++++++++++++ src/memory_profiler.h | 39 +++++++ 5 files changed, 247 insertions(+), 1 deletion(-) create mode 100644 python/jittor/test/test_memory_profiler.py create mode 100644 src/memory_profiler.cc create mode 100644 src/memory_profiler.h diff --git a/python/jittor/misc.py b/python/jittor/misc.py index 1fd70faf..6e9d8292 100644 --- a/python/jittor/misc.py +++ b/python/jittor/misc.py @@ -77,7 +77,7 @@ def repeat(x, *shape): x = x.broadcast(x_shape) elif len_x_shape > len_shape: rep_shape = (len_x_shape - len_shape) * [1] + shape - + #TODO if input.shape[i]=1, no add [1] reshape_shape = [] broadcast_shape = [] for x_s,r_s in zip(x_shape,rep_shape): diff --git a/python/jittor/test/test_memory_profiler.py b/python/jittor/test/test_memory_profiler.py new file mode 100644 index 00000000..f19d01bb --- /dev/null +++ b/python/jittor/test/test_memory_profiler.py @@ -0,0 +1,87 @@ +# *************************************************************** +# Copyright (c) 2020 Jittor. Authors: +# Guowei Yang <471184555@qq.com> +# Meng-Hao Guo +# Dun Liang . +# All Rights Reserved. +# This file is subject to the terms and conditions defined in +# file 'LICENSE.txt', which is part of this source code package. +# *************************************************************** +import jittor as jt +from jittor import nn, Module +from jittor.models import resnet +import numpy as np +import sys, os +import random +import math +import unittest +from jittor.test.test_reorder_tuner import simple_parser +from jittor.test.test_log import find_log_with_re +from jittor.dataset.mnist import MNIST +import jittor.transform as trans +import time + +skip_this_test = False + +class MnistNet(Module): + def __init__(self): + self.model = resnet.Resnet18() + self.layer = nn.Linear(1000,10) + def execute(self, x): + x = self.model(x) + x = self.layer(x) + return x + +@unittest.skipIf(skip_this_test, "skip_this_test") +class TestMemoryProfiler(unittest.TestCase): + @classmethod + def setUpClass(self): + # hyper-parameters + self.batch_size = 100 + self.weight_decay = 0.0001 + self.momentum = 0.9 + self.learning_rate = 0.1 + # mnist dataset + self.train_loader = MNIST(train=True, transform=trans.Resize(224)) \ + .set_attrs(batch_size=self.batch_size, shuffle=True) + self.train_loader.num_workers = 4 + + # setup random seed + def setup_seed(self, seed): + np.random.seed(seed) + random.seed(seed) + jt.seed(seed) + + @unittest.skipIf(not jt.has_cuda, "Cuda not found") + @jt.flag_scope(use_cuda=1, use_stat_allocator=1, trace_py_var=2) + def test_resnet(self): + self.setup_seed(1) + loss_list=[] + acc_list=[] + mnist_net = MnistNet() + global prev + prev = time.time() + SGD = nn.SGD(mnist_net.parameters(), self.learning_rate, self.momentum, self.weight_decay) + + iters = 50 + for batch_idx, (data, target) in enumerate(self.train_loader): + if (batch_idx > iters): + break + jt.display_memory_info() + output = mnist_net(data) + loss = nn.cross_entropy_loss(output, target) + SGD.step(loss) + def callback(batch_idx, loss, output, target): + global prev + pred = np.argmax(output, axis=1) + acc = np.mean(target==pred) + loss_list.append(loss[0]) + acc_list.append(acc) + print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAcc: {:.6f} \tTime:{:.3f}' + .format(0, batch_idx, iters,1. * batch_idx / 6.0, loss[0], acc, time.time()-prev)) + jt.fetch(batch_idx, loss, output, target, callback) + jt.sync_all(True) + jt.display_max_memory_info() + +if __name__ == "__main__": + unittest.main() diff --git a/src/executor.cc b/src/executor.cc index 9c53c194..ea77c25b 100644 --- a/src/executor.cc +++ b/src/executor.cc @@ -23,10 +23,12 @@ #include "profiler/profiler_guard.h" #include "parallel_compiler.h" #include "misc/nan_checker.h" +#include "memory_profiler.h" namespace jittor { Executor exe; +extern MemoryProfiler memory_profiler; // from fetch_op.cc extern list fetcher_to_free; @@ -420,6 +422,7 @@ void Executor::run_sync(vector vars, bool device_sync) { for (auto* var : op->outputs()) { var->alloc(allocator); } + memory_profiler.check(); LOGvvv << "Run" << op << "inputs:" << op->inputs() << "outputs:" << op->outputs(); op->do_prepare(jkl); bool is_cuda = op->flags.get(NodeFlags::_cuda); diff --git a/src/memory_profiler.cc b/src/memory_profiler.cc new file mode 100644 index 00000000..1324841c --- /dev/null +++ b/src/memory_profiler.cc @@ -0,0 +1,117 @@ +#include "memory_profiler.h" +#include "graph.h" +#include "var_holder.h" +#include "var.h" +#include "mem/allocator/sfrl_allocator.h" +#include +#include +#include +#include + +namespace jittor { + +//TODO reuse from mem_info.cc +struct FloatOutput_ { + double value; + string scale; + int base; + string suffix; + int p=4; +}; +std::ostream& operator<<(std::ostream& os, const FloatOutput_& o) { + int w = 8; + os << std::setw(w-2-o.suffix.size()); + os << std::setprecision(o.p); + uint i=0; + double k = o.value; + for (; i+1 MemoryProfiler::get_memory_info() { + size_t used = 0; + size_t unused = 0; + //TODO add mssfrl allocator + for (auto& a : SFRLAllocator::sfrl_allocators) { + used += a->used_memory; + unused += a->unused_memory; + } + return std::make_pair(used, unused); +} + +void MemoryProfiler::check() { + std::pair mem_info = get_memory_info(); + if (mem_info.first > max_used_memory_size) { + max_used_memory_size = mem_info.first; + + allocations.clear(); + size_t memory_size = 0; + vector> live_vars; + vector queue; + + auto t = ++Node::tflag_count; + for (auto& vh : VarHolder::hold_vars) + if (vh->var->tflag != t) { + vh->var->tflag = t; + queue.push_back(vh->var); + } + bfs_both(queue, [](Node*){return true;}); + for (Node* node : queue) { + if (node->is_var()) { + Var* var = (Var*)node; + if (var->mem_ptr != nullptr) { + std::stringstream stream; + stream << var; + live_vars.push_back(std::make_pair(stream.str(), var->size)); + if (!allocations.count(var->mem_ptr)) { + allocations[var->mem_ptr] = 1; + memory_size += var->size; + } + } + } + } + max_live_vars = live_vars; + max_memory_size = memory_size; + } +} + +bool MemoryProfiler::cmp(const std::pair& a, const std::pair& b) { + return a.second > b.second; +} + +void MemoryProfiler::display_max_memory_info() { + Log log("", 'i', 0); + std::sort(max_live_vars.begin(), max_live_vars.end(), cmp); + log << "\n=====display_max_memory_info=====\n"; + log << "max used memory" << FloatOutput_{(double)max_used_memory_size, " KMG", 1024, "B"} << "\n"; + log << "max var memory" << FloatOutput_{(double)max_memory_size, " KMG", 1024, "B"} << "\n\n"; + log << "[Size]" << "[Percent]" << "[Var Info]" << "\n"; + for (int i = 0; i < max_live_vars.size(); ++i) { + log << FloatOutput_{(double)max_live_vars[i].second, " KMG", 1024, "B"} << double(max_live_vars[i].second) / max_memory_size * 100 << "%" << max_live_vars[i].first << "\n\n"; + } + log << "=========================\n"; + log.end(); +} + +void display_max_memory_info() { + memory_profiler.display_max_memory_info(); +} + +} // jittor \ No newline at end of file diff --git a/src/memory_profiler.h b/src/memory_profiler.h new file mode 100644 index 00000000..b6cd2d88 --- /dev/null +++ b/src/memory_profiler.h @@ -0,0 +1,39 @@ +// *************************************************************** +// Copyright (c) 2020 Jittor. All Rights Reserved. +// Authors: Dun Liang . +// This file is subject to the terms and conditions defined in +// file 'LICENSE.txt', which is part of this source code package. +// *************************************************************** +#pragma once +#include "common.h" +#include "mem/allocator.h" +#include +#include +#include +#include "var.h" +namespace jittor { + +// @pyjt(display_max_memory_info) +void display_max_memory_info(); + +struct MemoryProfiler { + std::map allocations; + // Max Infos + std::vector> max_live_vars; + size_t max_used_memory_size; + size_t max_memory_size; + + + MemoryProfiler(); + static bool cmp(const std::pair& a, const std::pair& b); + void clear(); + void check(); + std::pair get_memory_info(); + void display_max_memory_info(); +}; + +extern MemoryProfiler memory_profiler; + +DECLARE_FLAG(int, profile_memory_enable); + +} // jittor \ No newline at end of file